diff options
82 files changed, 2449 insertions, 803 deletions
diff --git a/.gitlab-ci.d/base.yml b/.gitlab-ci.d/base.yml index 2fbb58d2a3..188a770799 100644 --- a/.gitlab-ci.d/base.yml +++ b/.gitlab-ci.d/base.yml @@ -1,10 +1,24 @@ +variables: + # On stable branches this is changed by later rules. Should also + # be overridden per pipeline if running pipelines concurrently + # for different branches in contributor forks. + QEMU_CI_CONTAINER_TAG: latest + + # For purposes of CI rules, upstream is the gitlab.com/qemu-project + # namespace. When testing CI, it might be usefult to override this + # to point to a fork repo + QEMU_CI_UPSTREAM: qemu-project + # The order of rules defined here is critically important. # They are evaluated in order and first match wins. # # Thus we group them into a number of stages, ordered from # most restrictive to least restrictive # +# For pipelines running for stable "staging-X.Y" branches +# we must override QEMU_CI_CONTAINER_TAG +# .base_job_template: variables: # Each script line from will be in a collapsible section in the job output @@ -19,28 +33,36 @@ # want jobs to run ############################################################# + # Never run jobs upstream on stable branch, staging branch jobs already ran + - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /^stable-/' + when: never + + # Never run jobs upstream on tags, staging branch jobs already ran + - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_TAG' + when: never + # Cirrus jobs can't run unless the creds / target repo are set - if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == null || $CIRRUS_API_TOKEN == null)' when: never # Publishing jobs should only run on the default branch in upstream - - if: '$QEMU_JOB_PUBLISH == "1" && $CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH' + - if: '$QEMU_JOB_PUBLISH == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH' when: never # Non-publishing jobs should only run on staging branches in upstream - - if: '$QEMU_JOB_PUBLISH != "1" && $CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/' + - if: '$QEMU_JOB_PUBLISH != "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH !~ /staging/' when: never # Jobs only intended for forks should always be skipped on upstream - - if: '$QEMU_JOB_ONLY_FORKS == "1" && $CI_PROJECT_NAMESPACE == "qemu-project"' + - if: '$QEMU_JOB_ONLY_FORKS == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM' when: never # Forks don't get pipelines unless QEMU_CI=1 or QEMU_CI=2 is set - - if: '$QEMU_CI != "1" && $QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != "qemu-project"' + - if: '$QEMU_CI != "1" && $QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM' when: never # Avocado jobs don't run in forks unless $QEMU_CI_AVOCADO_TESTING is set - - if: '$QEMU_JOB_AVOCADO && $QEMU_CI_AVOCADO_TESTING != "1" && $CI_PROJECT_NAMESPACE != "qemu-project"' + - if: '$QEMU_JOB_AVOCADO && $QEMU_CI_AVOCADO_TESTING != "1" && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM' when: never @@ -50,17 +72,29 @@ ############################################################# # Optional jobs should not be run unless manually triggered + - if: '$QEMU_JOB_OPTIONAL && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/' + when: manual + allow_failure: true + variables: + QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG + - if: '$QEMU_JOB_OPTIONAL' when: manual allow_failure: true # Skipped jobs should not be run unless manually triggered + - if: '$QEMU_JOB_SKIPPED && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/' + when: manual + allow_failure: true + variables: + QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG + - if: '$QEMU_JOB_SKIPPED' when: manual allow_failure: true # Avocado jobs can be manually start in forks if $QEMU_CI_AVOCADO_TESTING is unset - - if: '$QEMU_JOB_AVOCADO && $CI_PROJECT_NAMESPACE != "qemu-project"' + - if: '$QEMU_JOB_AVOCADO && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM' when: manual allow_failure: true @@ -72,8 +106,23 @@ # Forks pipeline jobs don't start automatically unless # QEMU_CI=2 is set - - if: '$QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != "qemu-project"' + - if: '$QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM' + when: manual + + # Upstream pipeline jobs start automatically unless told not to + # by setting QEMU_CI=1 + - if: '$QEMU_CI == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/' + when: manual + variables: + QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG + + - if: '$QEMU_CI == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM' when: manual # Jobs can run if any jobs they depend on were successful + - if: '$QEMU_JOB_SKIPPED && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/' + when: on_success + variables: + QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG + - when: on_success diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml index 76ff1dfcb6..5da61f4277 100644 --- a/.gitlab-ci.d/buildtest-template.yml +++ b/.gitlab-ci.d/buildtest-template.yml @@ -1,7 +1,7 @@ .native_build_job_template: extends: .base_job_template stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG before_script: - JOBS=$(expr $(nproc) + 1) script: @@ -40,7 +40,7 @@ .common_test_job_template: extends: .base_job_template stage: test - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG script: - scripts/git-submodule.sh update roms/SLOF - meson subprojects download $(cd build/subprojects && echo *) diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index 1922caf536..aa833b62ca 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -532,7 +532,7 @@ build-without-defaults: build-libvhost-user: extends: .base_job_template stage: build - image: $CI_REGISTRY_IMAGE/qemu/fedora:latest + image: $CI_REGISTRY_IMAGE/qemu/fedora:$QEMU_CI_CONTAINER_TAG needs: job: amd64-fedora-container script: @@ -572,7 +572,7 @@ build-tools-and-docs-debian: # of what topic branch they're currently using pages: extends: .base_job_template - image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:latest + image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:$QEMU_CI_CONTAINER_TAG stage: test needs: - job: build-tools-and-docs-debian diff --git a/.gitlab-ci.d/container-template.yml b/.gitlab-ci.d/container-template.yml index 77aa839e9e..4eec72f383 100644 --- a/.gitlab-ci.d/container-template.yml +++ b/.gitlab-ci.d/container-template.yml @@ -5,7 +5,8 @@ services: - docker:dind before_script: - - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest" + - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:$QEMU_CI_CONTAINER_TAG" + # Always ':latest' because we always use upstream as a common cache source - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/qemu/$NAME:latest" - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" - until docker info; do sleep 1; done diff --git a/.gitlab-ci.d/crossbuild-template.yml b/.gitlab-ci.d/crossbuild-template.yml index 4f93b9e4e5..6efb0d2a54 100644 --- a/.gitlab-ci.d/crossbuild-template.yml +++ b/.gitlab-ci.d/crossbuild-template.yml @@ -1,7 +1,7 @@ .cross_system_build_job: extends: .base_job_template stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG timeout: 80m script: - mkdir build @@ -27,7 +27,7 @@ .cross_accel_build_job: extends: .base_job_template stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG timeout: 30m script: - mkdir build @@ -39,7 +39,7 @@ .cross_user_build_job: extends: .base_job_template stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG script: - mkdir build - cd build diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml index b4cbdbce2a..ad9f426a52 100644 --- a/.gitlab-ci.d/static_checks.yml +++ b/.gitlab-ci.d/static_checks.yml @@ -26,7 +26,7 @@ check-dco: check-python-minreqs: extends: .base_job_template stage: test - image: $CI_REGISTRY_IMAGE/qemu/python:latest + image: $CI_REGISTRY_IMAGE/qemu/python:$QEMU_CI_CONTAINER_TAG script: - make -C python check-minreqs variables: @@ -37,7 +37,7 @@ check-python-minreqs: check-python-tox: extends: .base_job_template stage: test - image: $CI_REGISTRY_IMAGE/qemu/python:latest + image: $CI_REGISTRY_IMAGE/qemu/python:$QEMU_CI_CONTAINER_TAG script: - make -C python check-tox variables: diff --git a/.gitlab-ci.d/windows.yml b/.gitlab-ci.d/windows.yml index 472bacd2e2..f889a468b5 100644 --- a/.gitlab-ci.d/windows.yml +++ b/.gitlab-ci.d/windows.yml @@ -11,6 +11,13 @@ needs: [] stage: build timeout: 80m + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + expire_in: 7 days + paths: + - build/meson-logs/testlog.txt + reports: + junit: "build/meson-logs/testlog.junit.xml" before_script: - If ( !(Test-Path -Path msys64\var\cache ) ) { mkdir msys64\var\cache @@ -65,8 +72,8 @@ msys2-64bit: - $env:CHERE_INVOKING = 'yes' # Preserve the current working directory - $env:MSYSTEM = 'MINGW64' # Start a 64-bit MinGW environment - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink - - mkdir output - - cd output + - mkdir build + - cd build # Note: do not remove "--without-default-devices"! # commit 9f8e6cad65a6 ("gitlab-ci: Speed up the msys2-64bit job by using --without-default-devices" # changed to compile QEMU with the --without-default-devices switch @@ -115,8 +122,8 @@ msys2-32bit: - $env:CHERE_INVOKING = 'yes' # Preserve the current working directory - $env:MSYSTEM = 'MINGW32' # Start a 32-bit MinGW environment - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink - - mkdir output - - cd output + - mkdir build + - cd build - ..\msys64\usr\bin\bash -lc '../configure --target-list=ppc64-softmmu --enable-fdt=system' - ..\msys64\usr\bin\bash -lc 'make' diff --git a/MAINTAINERS b/MAINTAINERS index 7f323cd2eb..e07746ac7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -302,6 +302,7 @@ M: Daniel Henrique Barboza <danielhb413@gmail.com> R: Cédric Le Goater <clg@kaod.org> R: David Gibson <david@gibson.dropbear.id.au> R: Greg Kurz <groug@kaod.org> +R: Nicholas Piggin <npiggin@gmail.com> L: qemu-ppc@nongnu.org S: Odd Fixes F: target/ppc/ @@ -1451,6 +1452,8 @@ F: tests/avocado/ppc_pseries.py PowerNV (Non-Virtualized) M: Cédric Le Goater <clg@kaod.org> +R: Frédéric Barrat <fbarrat@linux.ibm.com> +R: Nicholas Piggin <npiggin@gmail.com> L: qemu-ppc@nongnu.org S: Odd Fixes F: docs/system/ppc/powernv.rst @@ -2445,6 +2448,7 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next XIVE M: Cédric Le Goater <clg@kaod.org> +R: Frédéric Barrat <fbarrat@linux.ibm.com> L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/*/*xive* @@ -83,16 +83,17 @@ config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/scripts/meson-buildoptions.sh @if test -f meson-private/coredata.dat; then \ ./config.status --skip-meson; \ else \ - ./config.status && touch build.ninja.stamp; \ + ./config.status; \ fi # 2. meson.stamp exists if meson has run at least once (so ninja reconfigure # works), but otherwise never needs to be updated + meson-private/coredata.dat: meson.stamp meson.stamp: config-host.mak @touch meson.stamp -# 3. ensure generated build files are up-to-date +# 3. ensure meson-generated build files are up-to-date ifneq ($(NINJA),) Makefile.ninja: build.ninja @@ -106,11 +107,19 @@ Makefile.ninja: build.ninja endif ifneq ($(MESON),) -# A separate rule is needed for Makefile dependencies to avoid -n +# The path to meson always points to pyvenv/bin/meson, but the absolute +# paths could change. In that case, force a regeneration of build.ninja. +# Note that this invocation of $(NINJA), just like when Make rebuilds +# Makefiles, does not include -n. build.ninja: build.ninja.stamp $(build-files): build.ninja.stamp: meson.stamp $(build-files) - $(MESON) setup --reconfigure $(SRC_PATH) && touch $@ + @if test "$$(cat build.ninja.stamp)" = "$(MESON)" && test -n "$(NINJA)"; then \ + $(NINJA) build.ninja; \ + else \ + echo "$(MESON) setup --reconfigure $(SRC_PATH)"; \ + $(MESON) setup --reconfigure $(SRC_PATH); \ + fi && echo "$(MESON)" > $@ Makefile.mtest: build.ninja scripts/mtest2make.py $(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@ diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 7679f397ae..9aa898db14 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)", kvm_arch_vcpu_id(cpu)); } + cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); + err: return ret; } @@ -4007,7 +4009,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd /* Read stats header */ kvm_stats_header = &descriptors->kvm_stats_header; - ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header)); + ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0); if (ret != sizeof(*kvm_stats_header)) { error_setg(errp, "KVM stats: failed to read stats header: " "expected %zu actual %zu", @@ -4038,7 +4040,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd } static void query_stats(StatsResultList **result, StatsTarget target, - strList *names, int stats_fd, Error **errp) + strList *names, int stats_fd, CPUState *cpu, + Error **errp) { struct kvm_stats_desc *kvm_stats_desc; struct kvm_stats_header *kvm_stats_header; @@ -4096,7 +4099,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, break; case STATS_TARGET_VCPU: add_stats_entry(result, STATS_PROVIDER_KVM, - current_cpu->parent_obj.canonical_path, + cpu->parent_obj.canonical_path, stats_list); break; default: @@ -4133,10 +4136,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); } -static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) +static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) { - StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; - int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); + int stats_fd = cpu->kvm_vcpu_stats_fd; Error *local_err = NULL; if (stats_fd == -1) { @@ -4145,14 +4147,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) return; } query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, - kvm_stats_args->names, stats_fd, kvm_stats_args->errp); - close(stats_fd); + kvm_stats_args->names, stats_fd, cpu, + kvm_stats_args->errp); } -static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) +static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) { - StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; - int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); + int stats_fd = cpu->kvm_vcpu_stats_fd; Error *local_err = NULL; if (stats_fd == -1) { @@ -4162,7 +4163,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) } query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, kvm_stats_args->errp); - close(stats_fd); } static void query_stats_cb(StatsResultList **result, StatsTarget target, @@ -4180,7 +4180,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, error_setg_errno(errp, errno, "KVM stats: ioctl failed"); return; } - query_stats(result, target, names, stats_fd, errp); + query_stats(result, target, names, stats_fd, NULL, errp); close(stats_fd); break; } @@ -4194,7 +4194,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { continue; } - run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); + query_stats_vcpu(cpu, &stats_args); } break; } @@ -4220,6 +4220,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) if (first_cpu) { stats_args.result.schema = result; stats_args.errp = errp; - run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); + query_stats_schema_vcpu(first_cpu, &stats_args); } } diff --git a/bsd-user/i386/target_arch_cpu.h b/bsd-user/i386/target_arch_cpu.h index d792dc720f..9bf2c4244b 100644 --- a/bsd-user/i386/target_arch_cpu.h +++ b/bsd-user/i386/target_arch_cpu.h @@ -164,6 +164,10 @@ static inline void target_cpu_loop(CPUX86State *env) } break; + case EXCP_SYSCALL: + /* doesn't do anything */ + break; + case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; @@ -758,7 +758,7 @@ done if ! test -e "$source_path/.git" then - git_submodules_action="ignore" + git_submodules_action="validate" fi # test for any invalid configuration combinations @@ -1895,6 +1895,7 @@ if test "$skip_meson" = no; then if test "$?" -ne 0 ; then error_exit "meson setup failed" fi + echo "$meson" > build.ninja.stamp else if test -f meson-private/cmd_line.txt; then # Adjust old command line options that were removed diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc index 1f28fec0d0..3f6802d51e 100644 --- a/docs/devel/ci-jobs.rst.inc +++ b/docs/devel/ci-jobs.rst.inc @@ -70,6 +70,17 @@ in a handful of namespaces repository CI settings, or as git push variables, to influence which jobs get run in a pipeline + * QEMU_CI_CONTAINER_TAG - the tag used to publish containers + in stage 1, for use by build jobs in stage 2. Defaults to + 'latest', but if running pipelines for different branches + concurrently, it should be overridden per pipeline. + + * QEMU_CI_UPSTREAM - gitlab namespace that is considered to be + the 'upstream'. This defaults to 'qemu-project'. Contributors + may choose to override this if they are modifying rules in + base.yml and need to validate how they will operate when in + an upstream context, as opposed to their fork context. + * nnn - other misc variables not falling into the above categories, or using different names for historical reasons and not yet converted. diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst index f4524b6d3e..6bb88a40c7 100644 --- a/docs/system/arm/cpu-features.rst +++ b/docs/system/arm/cpu-features.rst @@ -435,3 +435,26 @@ As with ``sve-default-vector-length``, if the default length is larger than the maximum vector length enabled, the actual vector length will be reduced. If this property is set to ``-1`` then the default vector length is set to the maximum possible length. + +RME CPU Properties +================== + +The status of RME support with QEMU is experimental. At this time we +only support RME within the CPU proper, not within the SMMU or GIC. +The feature is enabled by the CPU property ``x-rme``, with the ``x-`` +prefix present as a reminder of the experimental status, and defaults off. + +The method for enabling RME will change in some future QEMU release +without notice or backward compatibility. + +RME Level 0 GPT Size Property +----------------------------- + +To aid firmware developers in testing different possible CPU +configurations, ``x-l0gptsz=S`` may be used to specify the value +to encode into ``GPCCR_EL3.L0GPTSZ``, a read-only field that +specifies the size of the Level 0 Granule Protection Table. +Legal values for ``S`` are 30, 34, 36, and 39; the default is 30. + +As with ``x-rme``, the ``x-l0gptsz`` property may be renamed or +removed in some future QEMU release. diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index ecbbd63adf..bdafc68819 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -66,6 +66,7 @@ the following architecture extensions: - FEAT_RAS (Reliability, availability, and serviceability) - FEAT_RASv1p1 (RAS Extension v1.1) - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) +- FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) - FEAT_RNG (Random number generator) - FEAT_S2FWB (Stage 2 forced Write-Back) - FEAT_SB (Speculation Barrier) diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst index f571fe645e..a8e0b530a2 100644 --- a/docs/system/arm/sbsa.rst +++ b/docs/system/arm/sbsa.rst @@ -46,6 +46,9 @@ to be a complete compliant DT. It currently reports: - platform version - GIC addresses +Platform version +'''''''''''''''' + The platform version is only for informing platform firmware about what kind of ``sbsa-ref`` board it is running on. It is neither a QEMU versioned machine type nor a reflection of the level of the @@ -54,3 +57,14 @@ SBSA/SystemReady SR support provided. The ``machine-version-major`` value is updated when changes breaking fw compatibility are introduced. The ``machine-version-minor`` value is updated when features are added that don't break fw compatibility. + +Platform version changes: + +0.0 + Devicetree holds information about CPUs, memory and platform version. + +0.1 + GIC information is present in devicetree. + +0.2 + GIC ITS information is present in devicetree. diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index de21200ff9..b774d80291 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -65,6 +65,7 @@ enum { SBSA_CPUPERIPHS, SBSA_GIC_DIST, SBSA_GIC_REDIST, + SBSA_GIC_ITS, SBSA_SECURE_EC, SBSA_GWDT_WS0, SBSA_GWDT_REFRESH, @@ -108,6 +109,7 @@ static const MemMapEntry sbsa_ref_memmap[] = { [SBSA_CPUPERIPHS] = { 0x40000000, 0x00040000 }, [SBSA_GIC_DIST] = { 0x40060000, 0x00010000 }, [SBSA_GIC_REDIST] = { 0x40080000, 0x04000000 }, + [SBSA_GIC_ITS] = { 0x44081000, 0x00020000 }, [SBSA_SECURE_EC] = { 0x50000000, 0x00001000 }, [SBSA_GWDT_REFRESH] = { 0x50010000, 0x00001000 }, [SBSA_GWDT_CONTROL] = { 0x50011000, 0x00001000 }, @@ -181,8 +183,15 @@ static void sbsa_fdt_add_gic_node(SBSAMachineState *sms) 2, sbsa_ref_memmap[SBSA_GIC_REDIST].base, 2, sbsa_ref_memmap[SBSA_GIC_REDIST].size); + nodename = g_strdup_printf("/intc/its"); + qemu_fdt_add_subnode(sms->fdt, nodename); + qemu_fdt_setprop_sized_cells(sms->fdt, nodename, "reg", + 2, sbsa_ref_memmap[SBSA_GIC_ITS].base, + 2, sbsa_ref_memmap[SBSA_GIC_ITS].size); + g_free(nodename); } + /* * Firmware on this machine only uses ACPI table to load OS, these limited * device tree nodes are just to let firmware know the info which varies from @@ -219,7 +228,7 @@ static void create_fdt(SBSAMachineState *sms) * fw compatibility. */ qemu_fdt_setprop_cell(fdt, "/", "machine-version-major", 0); - qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 1); + qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 2); if (ms->numa_state->have_numa_distance) { int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t); @@ -409,7 +418,20 @@ static void create_secure_ram(SBSAMachineState *sms, memory_region_add_subregion(secure_sysmem, base, secram); } -static void create_gic(SBSAMachineState *sms) +static void create_its(SBSAMachineState *sms) +{ + const char *itsclass = its_class_name(); + DeviceState *dev; + + dev = qdev_new(itsclass); + + object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(sms->gic), + &error_abort); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, sbsa_ref_memmap[SBSA_GIC_ITS].base); +} + +static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) { unsigned int smp_cpus = MACHINE(sms)->smp.cpus; SysBusDevice *gicbusdev; @@ -436,6 +458,10 @@ static void create_gic(SBSAMachineState *sms) qdev_prop_set_uint32(sms->gic, "len-redist-region-count", 1); qdev_prop_set_uint32(sms->gic, "redist-region-count[0]", redist0_count); + object_property_set_link(OBJECT(sms->gic), "sysmem", + OBJECT(mem), &error_fatal); + qdev_prop_set_bit(sms->gic, "has-lpi", true); + gicbusdev = SYS_BUS_DEVICE(sms->gic); sysbus_realize_and_unref(gicbusdev, &error_fatal); sysbus_mmio_map(gicbusdev, 0, sbsa_ref_memmap[SBSA_GIC_DIST].base); @@ -482,6 +508,7 @@ static void create_gic(SBSAMachineState *sms) sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); } + create_its(sms); } static void create_uart(const SBSAMachineState *sms, int uart, @@ -788,7 +815,7 @@ static void sbsa_ref_init(MachineState *machine) create_secure_ram(sms, secure_sysmem); - create_gic(sms); + create_gic(sms, sysmem); create_uart(sms, SBSA_UART, sysmem, serial_hd(0)); create_uart(sms, SBSA_SECURE_UART, secure_sysmem, serial_hd(1)); @@ -883,6 +910,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; } static const TypeInfo sbsa_ref_info = { diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9b9f7d9c68..3937e30477 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3033,6 +3033,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->smp_props.clusters_supported = true; mc->auto_enable_numa_with_memhp = true; mc->auto_enable_numa_with_memdev = true; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; mc->default_ram_id = "mach-virt.ram"; mc->default_nic = "virtio-net-pci"; diff --git a/hw/core/machine.c b/hw/core/machine.c index 1000406211..46f8f9a2b0 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1262,6 +1262,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) g_string_free(s, true); } +static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + NumaState *state = ms->numa_state; + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + const CPUArchId *cpus = possible_cpus->cpus; + int i, j; + + if (state->num_nodes <= 1 || possible_cpus->len <= 1) { + return; + } + + /* + * The Linux scheduling domain can't be parsed when the multiple CPUs + * in one cluster have been associated with different NUMA nodes. However, + * it's fine to associate one NUMA node with CPUs in different clusters. + */ + for (i = 0; i < possible_cpus->len; i++) { + for (j = i + 1; j < possible_cpus->len; j++) { + if (cpus[i].props.has_socket_id && + cpus[i].props.has_cluster_id && + cpus[i].props.has_node_id && + cpus[j].props.has_socket_id && + cpus[j].props.has_cluster_id && + cpus[j].props.has_node_id && + cpus[i].props.socket_id == cpus[j].props.socket_id && + cpus[i].props.cluster_id == cpus[j].props.cluster_id && + cpus[i].props.node_id != cpus[j].props.node_id) { + warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 + " have been associated with node-%" PRId64 " and node-%" PRId64 + " respectively. It can cause OSes like Linux to" + " misbehave", i, j, cpus[i].props.socket_id, + cpus[i].props.cluster_id, cpus[i].props.node_id, + cpus[j].props.node_id); + } + } + } +} + MemoryRegion *machine_consume_memdev(MachineState *machine, HostMemoryBackend *backend) { @@ -1355,6 +1394,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * numa_complete_configuration(machine); if (machine->numa_state->num_nodes) { machine_numa_finish_cpu_init(machine); + if (machine_class->cpu_cluster_has_numa_boundary) { + validate_cpu_cluster_to_numa_boundary(machine); + } } } diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index b00a91ecfe..866e11d208 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -122,6 +122,7 @@ static FWCfgState *create_fw_cfg(MachineState *ms) { FWCfgState *fw_cfg; uint64_t val; + const char qemu_version[] = QEMU_VERSION; fw_cfg = fw_cfg_init_mem(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus); @@ -147,6 +148,10 @@ static FWCfgState *create_fw_cfg(MachineState *ms) fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_config.order[0]); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); + fw_cfg_add_file(fw_cfg, "/etc/qemu-version", + g_memdup(qemu_version, sizeof(qemu_version)), + sizeof(qemu_version)); + return fw_cfg; } @@ -418,10 +423,16 @@ static void hppa_machine_reset(MachineState *ms, ShutdownCause reason) /* Start all CPUs at the firmware entry point. * Monarch CPU will initialize firmware, secondary CPUs - * will enter a small idle look and wait for rendevouz. */ + * will enter a small idle loop and wait for rendevouz. */ for (i = 0; i < smp_cpus; i++) { - cpu_set_pc(CPU(cpu[i]), firmware_entry); + CPUState *cs = CPU(cpu[i]); + + cpu_set_pc(cs, firmware_entry); + cpu[i]->env.psw = PSW_Q; cpu[i]->env.gr[5] = CPU_HPA + i * 0x1000; + + cs->exception_index = -1; + cs->halted = 0; } /* already initialized by machine_hppa_init()? */ diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index 622f9d28b7..e536b3ec26 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -479,6 +479,16 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr) +{ + uint32_t cfg = 0; + + /* TIMA GEN1 is all P9 knows */ + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + + return cfg; +} + static uint8_t pnv_xive_get_block_id(XiveRouter *xrtr) { return pnv_xive_block_id(PNV_XIVE(xrtr)); @@ -1991,6 +2001,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data) xnc->notify = pnv_xive_notify; xpc->match_nvt = pnv_xive_match_nvt; + xpc->get_config = pnv_xive_presenter_get_config; }; static const TypeInfo pnv_xive_info = { diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index ec1edeb385..ed438a20ed 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -501,6 +501,17 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr) +{ + PnvXive2 *xive = PNV_XIVE2(xptr); + uint32_t cfg = 0; + + if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) { + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + } + return cfg; +} + static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr) { return pnv_xive2_block_id(PNV_XIVE2(xrtr)); @@ -1645,17 +1656,6 @@ static const MemoryRegionOps pnv_xive2_ic_tm_indirect_ops = { /* * TIMA ops */ - -/* - * Special TIMA offsets to handle accesses in a POWER10 way. - * - * Only the CAM line updates done by the hypervisor should be handled - * specifically. - */ -#define HV_PAGE_OFFSET (XIVE_TM_HV_PAGE << TM_SHIFT) -#define HV_PUSH_OS_CTX_OFFSET (HV_PAGE_OFFSET | (TM_QW1_OS + TM_WORD2)) -#define HV_PULL_OS_CTX_OFFSET (HV_PAGE_OFFSET | TM_SPC_PULL_OS_CTX) - static void pnv_xive2_tm_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { @@ -1663,18 +1663,7 @@ static void pnv_xive2_tm_write(void *opaque, hwaddr offset, PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu); XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); XivePresenter *xptr = XIVE_PRESENTER(xive); - bool gen1_tima_os = - xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; - - offset &= TM_ADDRESS_MASK; - - /* TODO: should we switch the TM ops table instead ? */ - if (!gen1_tima_os && offset == HV_PUSH_OS_CTX_OFFSET) { - xive2_tm_push_os_ctx(xptr, tctx, offset, value, size); - return; - } - /* Other TM ops are the same as XIVE1 */ xive_tctx_tm_write(xptr, tctx, offset, value, size); } @@ -1684,17 +1673,7 @@ static uint64_t pnv_xive2_tm_read(void *opaque, hwaddr offset, unsigned size) PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu); XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); XivePresenter *xptr = XIVE_PRESENTER(xive); - bool gen1_tima_os = - xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; - - offset &= TM_ADDRESS_MASK; - - /* TODO: should we switch the TM ops table instead ? */ - if (!gen1_tima_os && offset == HV_PULL_OS_CTX_OFFSET) { - return xive2_tm_pull_os_ctx(xptr, tctx, offset, size); - } - /* Other TM ops are the same as XIVE1 */ return xive_tctx_tm_read(xptr, tctx, offset, size); } @@ -1987,6 +1966,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, void *data) xnc->notify = pnv_xive2_notify; xpc->match_nvt = pnv_xive2_match_nvt; + xpc->get_config = pnv_xive2_presenter_get_config; }; static const TypeInfo pnv_xive2_info = { diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index dc641cc604..8bcab2846c 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -475,6 +475,21 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr) +{ + uint32_t cfg = 0; + + /* + * Let's claim GEN1 TIMA format. If running with KVM on P10, the + * correct answer is deep in the hardware and not accessible to + * us. But it shouldn't matter as it only affects the presenter + * as seen by a guest OS. + */ + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + + return cfg; +} + static uint8_t spapr_xive_get_block_id(XiveRouter *xrtr) { return SPAPR_XIVE_BLOCK_ID; @@ -832,6 +847,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data) sicc->post_load = spapr_xive_post_load; xpc->match_nvt = spapr_xive_match_nvt; + xpc->get_config = spapr_xive_presenter_get_config; xpc->in_kernel = spapr_xive_in_kernel_xptr; } diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 5204c14b87..84c079b034 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -20,6 +20,7 @@ #include "monitor/monitor.h" #include "hw/irq.h" #include "hw/ppc/xive.h" +#include "hw/ppc/xive2.h" #include "hw/ppc/xive_regs.h" #include "trace.h" @@ -461,6 +462,13 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, } } +static uint32_t xive_presenter_get_config(XivePresenter *xptr) +{ + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); + + return xpc->get_config(xptr); +} + /* * Define a mapping of "special" operations depending on the TIMA page * offset and the size of the operation. @@ -497,14 +505,47 @@ static const XiveTmOp xive_tm_operations[] = { { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx }, }; -static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write) +static const XiveTmOp xive2_tm_operations[] = { + /* + * MMIOs below 2K : raw values and special operations without side + * effects + */ + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll }, + + /* MMIOs above 2K : special operations with side effects */ + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx }, +}; + +static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset, + unsigned size, bool write) { uint8_t page_offset = (offset >> TM_SHIFT) & 0x3; uint32_t op_offset = offset & TM_ADDRESS_MASK; - int i; + const XiveTmOp *tm_ops; + int i, tm_ops_count; + uint32_t cfg; + + cfg = xive_presenter_get_config(xptr); + if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) { + tm_ops = xive_tm_operations; + tm_ops_count = ARRAY_SIZE(xive_tm_operations); + } else { + tm_ops = xive2_tm_operations; + tm_ops_count = ARRAY_SIZE(xive2_tm_operations); + } - for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) { - const XiveTmOp *xto = &xive_tm_operations[i]; + for (i = 0; i < tm_ops_count; i++) { + const XiveTmOp *xto = &tm_ops[i]; /* Accesses done from a more privileged TIMA page is allowed */ if (xto->page_offset >= page_offset && @@ -535,7 +576,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(offset, size, true); + xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA " "@%"HWADDR_PRIx"\n", offset); @@ -548,7 +589,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(offset, size, true); + xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (xto) { xto->write_handler(xptr, tctx, offset, value, size); return; @@ -574,7 +615,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(offset, size, false); + xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA" "@%"HWADDR_PRIx"\n", offset); @@ -587,7 +628,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(offset, size, false); + xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (xto) { ret = xto->read_handler(xptr, tctx, offset, size); goto out; diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 542f9e2932..6232cbeee1 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -133,13 +133,13 @@ static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off, PCIDevice *pdev; if (size != 4) { - phb_error(phb, "rc_config_write invalid size %d\n", size); + phb_error(phb, "rc_config_write invalid size %d", size); return; } pdev = pci_find_device(pci->bus, 0, 0); if (!pdev) { - phb_error(phb, "rc_config_write device not found\n"); + phb_error(phb, "rc_config_write device not found"); return; } @@ -155,13 +155,13 @@ static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off, uint64_t val; if (size != 4) { - phb_error(phb, "rc_config_read invalid size %d\n", size); + phb_error(phb, "rc_config_read invalid size %d", size); return ~0ull; } pdev = pci_find_device(pci->bus, 0, 0); if (!pdev) { - phb_error(phb, "rc_config_read device not found\n"); + phb_error(phb, "rc_config_read device not found"); return ~0ull; } @@ -1039,19 +1039,19 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & (PEC_NEST_STK_BAR_EN_MMIO0 | PEC_NEST_STK_BAR_EN_MMIO1)) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xffffffffff000000ull; break; case PEC_NEST_STK_PHB_REGS_BAR: if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xffffffffffc00000ull; break; case PEC_NEST_STK_INT_BAR: if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xfffffff000000000ull; break; diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index c927337da0..a313d4b964 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -15,6 +15,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_vio.c', 'spapr_events.c', 'spapr_hcall.c', + 'spapr_nested.c', 'spapr_iommu.c', 'spapr_rtas.c', 'spapr_pci.c', diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 590fc64b32..fc083173f3 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -799,7 +799,8 @@ static void pnv_init(MachineState *machine) DeviceState *dev; if (kvm_enabled()) { - error_report("The powernv machine does not work with KVM acceleration"); + error_report("machine %s does not support the KVM accelerator", + mc->name); exit(EXIT_FAILURE); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 1b1220c423..82e4408c5c 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -1436,6 +1436,12 @@ int ppc_cpu_pir(PowerPCCPU *cpu) return env->spr_cb[SPR_PIR].default_value; } +int ppc_cpu_tir(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + return env->spr_cb[SPR_TIR].default_value; +} + PowerPCCPU *ppc_get_vcpu_by_pir(int pir) { CPUState *cs; diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index f969fa3c29..f061b8cf3b 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -19,7 +19,6 @@ #include "hw/pci/pci.h" #include "hw/boards.h" #include "sysemu/kvm.h" -#include "kvm_ppc.h" #include "sysemu/device_tree.h" #include "hw/loader.h" #include "elf.h" @@ -97,16 +96,6 @@ static int bamboo_load_device_tree(MachineState *machine, fprintf(stderr, "couldn't set /chosen/bootargs\n"); } - /* - * Copy data from the host device tree into the guest. Since the guest can - * directly access the timebase without host involvement, we must expose - * the correct frequencies. - */ - if (kvm_enabled()) { - tb_freq = kvmppc_get_tbfreq(); - clock_freq = kvmppc_get_clockfreq(); - } - qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency", clock_freq); qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency", @@ -175,6 +164,12 @@ static void bamboo_init(MachineState *machine) int success; int i; + if (kvm_enabled()) { + error_report("machine %s does not support the KVM accelerator", + MACHINE_GET_CLASS(machine)->name); + exit(EXIT_FAILURE); + } + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 33bf232f8b..d9231c7317 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -45,7 +45,6 @@ #include "trace.h" #include "elf.h" #include "qemu/units.h" -#include "kvm_ppc.h" /* SMP is not enabled, for now */ #define MAX_CPUS 1 @@ -245,6 +244,12 @@ static void ibm_40p_init(MachineState *machine) long kernel_size = 0, initrd_size = 0; char boot_device; + if (kvm_enabled()) { + error_report("machine %s does not support the KVM accelerator", + MACHINE_GET_CLASS(machine)->name); + exit(EXIT_FAILURE); + } + /* init CPU */ cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; @@ -392,18 +397,7 @@ static void ibm_40p_init(MachineState *machine) fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); - if (kvm_enabled()) { - uint8_t *hypercall; - - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq()); - hypercall = g_malloc(16); - kvmppc_get_hypercall(env, hypercall, 16); - fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); - } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index dcb7f1c70a..54dbfd7fe9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -61,6 +61,7 @@ #include "hw/ppc/fdt.h" #include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_nested.h" #include "hw/ppc/spapr_vio.h" #include "hw/ppc/vof.h" #include "hw/qdev-properties.h" @@ -2524,10 +2525,19 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) int ret; unsigned int smp_threads = ms->smp.threads; - if (!kvm_enabled() && (smp_threads > 1)) { - error_setg(errp, "TCG cannot support more than 1 thread/core " - "on a pseries machine"); - return; + if (tcg_enabled()) { + if (smp_threads > 1 && + !ppc_type_check_compat(ms->cpu_type, CPU_POWERPC_LOGICAL_2_07, 0, + spapr->max_compat_pvr)) { + error_setg(errp, "TCG only supports SMT on POWER8 or newer CPUs"); + return; + } + + if (smp_threads > 8) { + error_setg(errp, "TCG cannot support more than 8 threads/core " + "on a pseries machine"); + return; + } } if (!is_power_of_2(smp_threads)) { error_setg(errp, "Cannot support %d threads/core on a pseries " diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 3fd45a6dec..5a0755d34f 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -473,6 +473,20 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); } + } else if (tcg_enabled()) { + MachineState *ms = MACHINE(spapr); + unsigned int smp_threads = ms->smp.threads; + + /* + * Nested-HV vCPU env state to L2, so SMT-shared SPR updates, for + * example, do not necessarily update the correct SPR value on sibling + * threads that are in a different guest/host context. + */ + if (smp_threads > 1) { + error_setg(errp, "TCG does not support nested-HV with SMT"); + error_append_hint(errp, "Try appending -machine cap-nested-hv=off " + "or use threads=1 with -smp\n"); + } } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 9b88dd549a..a4e3c2fadd 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -255,7 +255,7 @@ static void spapr_cpu_core_unrealize(DeviceState *dev) } static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, - SpaprCpuCore *sc, Error **errp) + SpaprCpuCore *sc, int thread_index, Error **errp) { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -267,6 +267,9 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); + env->spr_cb[SPR_PIR].default_value = cs->cpu_index; + env->spr_cb[SPR_TIR].default_value = thread_index; + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -337,7 +340,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) for (i = 0; i < cc->nr_threads; i++) { sc->threads[i] = spapr_create_vcpu(sc, i, errp); if (!sc->threads[i] || - !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) { + !spapr_realize_vcpu(sc->threads[i], spapr, sc, i, errp)) { spapr_cpu_core_unrealize(dev); return; } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index b904755575..002ea0b7c1 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -13,6 +13,7 @@ #include "hw/ppc/ppc.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_nested.h" #include "mmu-hash64.h" #include "cpu-models.h" #include "trace.h" @@ -1498,349 +1499,17 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, } #ifdef CONFIG_TCG -#define PRTS_MASK 0x1f - -static target_ulong h_set_ptbl(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - target_ulong ptcr = args[0]; - - if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { - return H_FUNCTION; - } - - if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { - return H_PARAMETER; - } - - spapr->nested_ptcr = ptcr; /* Save new partition table */ - - return H_SUCCESS; -} - -static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - /* - * The spapr virtual hypervisor nested HV implementation retains no L2 - * translation state except for TLB. And the TLB is always invalidated - * across L1<->L2 transitions, so nothing is required here. - */ - - return H_SUCCESS; -} - -static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - /* - * This HCALL is not required, L1 KVM will take a slow path and walk the - * page tables manually to do the data copy. - */ - return H_FUNCTION; -} - -/* - * When this handler returns, the environment is switched to the L2 guest - * and TCG begins running that. spapr_exit_nested() performs the switch from - * L2 back to L1 and returns from the H_ENTER_NESTED hcall. - */ -static target_ulong h_enter_nested(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - target_ulong hv_ptr = args[0]; - target_ulong regs_ptr = args[1]; - target_ulong hdec, now = cpu_ppc_load_tbl(env); - target_ulong lpcr, lpcr_mask; - struct kvmppc_hv_guest_state *hvstate; - struct kvmppc_hv_guest_state hv_state; - struct kvmppc_pt_regs *regs; - hwaddr len; - - if (spapr->nested_ptcr == 0) { - return H_NOT_AVAILABLE; - } - - len = sizeof(*hvstate); - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, - MEMTXATTRS_UNSPECIFIED); - if (len != sizeof(*hvstate)) { - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); - return H_PARAMETER; - } - - memcpy(&hv_state, hvstate, len); - - address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); - - /* - * We accept versions 1 and 2. Version 2 fields are unused because TCG - * does not implement DAWR*. - */ - if (hv_state.version > HV_GUEST_STATE_VERSION) { - return H_PARAMETER; - } - - spapr_cpu->nested_host_state = g_try_new(CPUPPCState, 1); - if (!spapr_cpu->nested_host_state) { - return H_NO_MEM; - } - - memcpy(spapr_cpu->nested_host_state, env, sizeof(CPUPPCState)); - - len = sizeof(*regs); - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, - MEMTXATTRS_UNSPECIFIED); - if (!regs || len != sizeof(*regs)) { - address_space_unmap(CPU(cpu)->as, regs, len, 0, false); - g_free(spapr_cpu->nested_host_state); - return H_P2; - } - - len = sizeof(env->gpr); - assert(len == sizeof(regs->gpr)); - memcpy(env->gpr, regs->gpr, len); - - env->lr = regs->link; - env->ctr = regs->ctr; - cpu_write_xer(env, regs->xer); - ppc_set_cr(env, regs->ccr); - - env->msr = regs->msr; - env->nip = regs->nip; - - address_space_unmap(CPU(cpu)->as, regs, len, len, false); - - env->cfar = hv_state.cfar; - - assert(env->spr[SPR_LPIDR] == 0); - env->spr[SPR_LPIDR] = hv_state.lpid; - - lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; - lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); - lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; - lpcr &= ~LPCR_LPES0; - env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask; - - env->spr[SPR_PCR] = hv_state.pcr; - /* hv_state.amor is not used */ - env->spr[SPR_DPDES] = hv_state.dpdes; - env->spr[SPR_HFSCR] = hv_state.hfscr; - hdec = hv_state.hdec_expiry - now; - spapr_cpu->nested_tb_offset = hv_state.tb_offset; - /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ - env->spr[SPR_SRR0] = hv_state.srr0; - env->spr[SPR_SRR1] = hv_state.srr1; - env->spr[SPR_SPRG0] = hv_state.sprg[0]; - env->spr[SPR_SPRG1] = hv_state.sprg[1]; - env->spr[SPR_SPRG2] = hv_state.sprg[2]; - env->spr[SPR_SPRG3] = hv_state.sprg[3]; - env->spr[SPR_BOOKS_PID] = hv_state.pidr; - env->spr[SPR_PPR] = hv_state.ppr; - - cpu_ppc_hdecr_init(env); - cpu_ppc_store_hdecr(env, hdec); - - /* - * The hv_state.vcpu_token is not needed. It is used by the KVM - * implementation to remember which L2 vCPU last ran on which physical - * CPU so as to invalidate process scope translations if it is moved - * between physical CPUs. For now TLBs are always flushed on L1<->L2 - * transitions so this is not a problem. - * - * Could validate that the same vcpu_token does not attempt to run on - * different L1 vCPUs at the same time, but that would be a L1 KVM bug - * and it's not obviously worth a new data structure to do it. - */ - - env->tb_env->tb_offset += spapr_cpu->nested_tb_offset; - spapr_cpu->in_nested = true; - - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ - - /* - * The spapr hcall helper sets env->gpr[3] to the return value, but at - * this point the L1 is not returning from the hcall but rather we - * start running the L2, so r3 must not be clobbered, so return env->gpr[3] - * to leave it unchanged. - */ - return env->gpr[3]; -} - -void spapr_exit_nested(PowerPCCPU *cpu, int excp) -{ - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */ - target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; - target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; - struct kvmppc_hv_guest_state *hvstate; - struct kvmppc_pt_regs *regs; - hwaddr len; - - assert(spapr_cpu->in_nested); - - cpu_ppc_hdecr_exit(env); - - len = sizeof(*hvstate); - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, - MEMTXATTRS_UNSPECIFIED); - if (len != sizeof(*hvstate)) { - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); - r3_return = H_PARAMETER; - goto out_restore_l1; - } - - hvstate->cfar = env->cfar; - hvstate->lpcr = env->spr[SPR_LPCR]; - hvstate->pcr = env->spr[SPR_PCR]; - hvstate->dpdes = env->spr[SPR_DPDES]; - hvstate->hfscr = env->spr[SPR_HFSCR]; - - if (excp == POWERPC_EXCP_HDSI) { - hvstate->hdar = env->spr[SPR_HDAR]; - hvstate->hdsisr = env->spr[SPR_HDSISR]; - hvstate->asdr = env->spr[SPR_ASDR]; - } else if (excp == POWERPC_EXCP_HISI) { - hvstate->asdr = env->spr[SPR_ASDR]; - } - - /* HEIR should be implemented for HV mode and saved here. */ - hvstate->srr0 = env->spr[SPR_SRR0]; - hvstate->srr1 = env->spr[SPR_SRR1]; - hvstate->sprg[0] = env->spr[SPR_SPRG0]; - hvstate->sprg[1] = env->spr[SPR_SPRG1]; - hvstate->sprg[2] = env->spr[SPR_SPRG2]; - hvstate->sprg[3] = env->spr[SPR_SPRG3]; - hvstate->pidr = env->spr[SPR_BOOKS_PID]; - hvstate->ppr = env->spr[SPR_PPR]; - - /* Is it okay to specify write length larger than actual data written? */ - address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); - - len = sizeof(*regs); - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, - MEMTXATTRS_UNSPECIFIED); - if (!regs || len != sizeof(*regs)) { - address_space_unmap(CPU(cpu)->as, regs, len, 0, true); - r3_return = H_P2; - goto out_restore_l1; - } - - len = sizeof(env->gpr); - assert(len == sizeof(regs->gpr)); - memcpy(regs->gpr, env->gpr, len); - - regs->link = env->lr; - regs->ctr = env->ctr; - regs->xer = cpu_read_xer(env); - regs->ccr = ppc_get_cr(env); - - if (excp == POWERPC_EXCP_MCHECK || - excp == POWERPC_EXCP_RESET || - excp == POWERPC_EXCP_SYSCALL) { - regs->nip = env->spr[SPR_SRR0]; - regs->msr = env->spr[SPR_SRR1] & env->msr_mask; - } else { - regs->nip = env->spr[SPR_HSRR0]; - regs->msr = env->spr[SPR_HSRR1] & env->msr_mask; - } - - /* Is it okay to specify write length larger than actual data written? */ - address_space_unmap(CPU(cpu)->as, regs, len, len, true); - -out_restore_l1: - memcpy(env->gpr, spapr_cpu->nested_host_state->gpr, sizeof(env->gpr)); - env->lr = spapr_cpu->nested_host_state->lr; - env->ctr = spapr_cpu->nested_host_state->ctr; - memcpy(env->crf, spapr_cpu->nested_host_state->crf, sizeof(env->crf)); - env->cfar = spapr_cpu->nested_host_state->cfar; - env->xer = spapr_cpu->nested_host_state->xer; - env->so = spapr_cpu->nested_host_state->so; - env->ov = spapr_cpu->nested_host_state->ov; - env->ov32 = spapr_cpu->nested_host_state->ov32; - env->ca32 = spapr_cpu->nested_host_state->ca32; - env->msr = spapr_cpu->nested_host_state->msr; - env->nip = spapr_cpu->nested_host_state->nip; - - assert(env->spr[SPR_LPIDR] != 0); - env->spr[SPR_LPCR] = spapr_cpu->nested_host_state->spr[SPR_LPCR]; - env->spr[SPR_LPIDR] = spapr_cpu->nested_host_state->spr[SPR_LPIDR]; - env->spr[SPR_PCR] = spapr_cpu->nested_host_state->spr[SPR_PCR]; - env->spr[SPR_DPDES] = 0; - env->spr[SPR_HFSCR] = spapr_cpu->nested_host_state->spr[SPR_HFSCR]; - env->spr[SPR_SRR0] = spapr_cpu->nested_host_state->spr[SPR_SRR0]; - env->spr[SPR_SRR1] = spapr_cpu->nested_host_state->spr[SPR_SRR1]; - env->spr[SPR_SPRG0] = spapr_cpu->nested_host_state->spr[SPR_SPRG0]; - env->spr[SPR_SPRG1] = spapr_cpu->nested_host_state->spr[SPR_SPRG1]; - env->spr[SPR_SPRG2] = spapr_cpu->nested_host_state->spr[SPR_SPRG2]; - env->spr[SPR_SPRG3] = spapr_cpu->nested_host_state->spr[SPR_SPRG3]; - env->spr[SPR_BOOKS_PID] = spapr_cpu->nested_host_state->spr[SPR_BOOKS_PID]; - env->spr[SPR_PPR] = spapr_cpu->nested_host_state->spr[SPR_PPR]; - - /* - * Return the interrupt vector address from H_ENTER_NESTED to the L1 - * (or error code). - */ - env->gpr[3] = r3_return; - - env->tb_env->tb_offset -= spapr_cpu->nested_tb_offset; - spapr_cpu->in_nested = false; - - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ - - g_free(spapr_cpu->nested_host_state); - spapr_cpu->nested_host_state = NULL; -} - -static void hypercall_register_nested(void) -{ - spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); - spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); - spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); - spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); -} - static void hypercall_register_softmmu(void) { /* DO NOTHING */ } #else -void spapr_exit_nested(PowerPCCPU *cpu, int excp) -{ - g_assert_not_reached(); -} - static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) { g_assert_not_reached(); } -static void hypercall_register_nested(void) -{ - /* DO NOTHING */ -} - static void hypercall_register_softmmu(void) { /* hcall-pft */ @@ -1910,7 +1579,7 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); - hypercall_register_nested(); + spapr_register_nested(); } type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c new file mode 100644 index 0000000000..121aa96ddc --- /dev/null +++ b/hw/ppc/spapr_nested.c @@ -0,0 +1,395 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "exec/exec-all.h" +#include "helper_regs.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_nested.h" + +#ifdef CONFIG_TCG +#define PRTS_MASK 0x1f + +static target_ulong h_set_ptbl(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong ptcr = args[0]; + + if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { + return H_FUNCTION; + } + + if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { + return H_PARAMETER; + } + + spapr->nested_ptcr = ptcr; /* Save new partition table */ + + return H_SUCCESS; +} + +static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * The spapr virtual hypervisor nested HV implementation retains no L2 + * translation state except for TLB. And the TLB is always invalidated + * across L1<->L2 transitions, so nothing is required here. + */ + + return H_SUCCESS; +} + +static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * This HCALL is not required, L1 KVM will take a slow path and walk the + * page tables manually to do the data copy. + */ + return H_FUNCTION; +} + +static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + memcpy(save->gpr, env->gpr, sizeof(save->gpr)); + + save->lr = env->lr; + save->ctr = env->ctr; + save->cfar = env->cfar; + save->msr = env->msr; + save->nip = env->nip; + + save->cr = ppc_get_cr(env); + save->xer = cpu_read_xer(env); + + save->lpcr = env->spr[SPR_LPCR]; + save->lpidr = env->spr[SPR_LPIDR]; + save->pcr = env->spr[SPR_PCR]; + save->dpdes = env->spr[SPR_DPDES]; + save->hfscr = env->spr[SPR_HFSCR]; + save->srr0 = env->spr[SPR_SRR0]; + save->srr1 = env->spr[SPR_SRR1]; + save->sprg0 = env->spr[SPR_SPRG0]; + save->sprg1 = env->spr[SPR_SPRG1]; + save->sprg2 = env->spr[SPR_SPRG2]; + save->sprg3 = env->spr[SPR_SPRG3]; + save->pidr = env->spr[SPR_BOOKS_PID]; + save->ppr = env->spr[SPR_PPR]; + + save->tb_offset = env->tb_env->tb_offset; +} + +static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + memcpy(env->gpr, load->gpr, sizeof(env->gpr)); + + env->lr = load->lr; + env->ctr = load->ctr; + env->cfar = load->cfar; + env->msr = load->msr; + env->nip = load->nip; + + ppc_set_cr(env, load->cr); + cpu_write_xer(env, load->xer); + + env->spr[SPR_LPCR] = load->lpcr; + env->spr[SPR_LPIDR] = load->lpidr; + env->spr[SPR_PCR] = load->pcr; + env->spr[SPR_DPDES] = load->dpdes; + env->spr[SPR_HFSCR] = load->hfscr; + env->spr[SPR_SRR0] = load->srr0; + env->spr[SPR_SRR1] = load->srr1; + env->spr[SPR_SPRG0] = load->sprg0; + env->spr[SPR_SPRG1] = load->sprg1; + env->spr[SPR_SPRG2] = load->sprg2; + env->spr[SPR_SPRG3] = load->sprg3; + env->spr[SPR_BOOKS_PID] = load->pidr; + env->spr[SPR_PPR] = load->ppr; + + env->tb_env->tb_offset = load->tb_offset; + + /* + * MSR updated, compute hflags and possible interrupts. + */ + hreg_compute_hflags(env); + ppc_maybe_interrupt(env); + + /* + * Nested HV does not tag TLB entries between L1 and L2, so must + * flush on transition. + */ + tlb_flush(cs); + env->reserve_addr = -1; /* Reset the reservation */ +} + +/* + * When this handler returns, the environment is switched to the L2 guest + * and TCG begins running that. spapr_exit_nested() performs the switch from + * L2 back to L1 and returns from the H_ENTER_NESTED hcall. + */ +static target_ulong h_enter_nested(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + struct nested_ppc_state l2_state; + target_ulong hv_ptr = args[0]; + target_ulong regs_ptr = args[1]; + target_ulong hdec, now = cpu_ppc_load_tbl(env); + target_ulong lpcr, lpcr_mask; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_hv_guest_state hv_state; + struct kvmppc_pt_regs *regs; + hwaddr len; + + if (spapr->nested_ptcr == 0) { + return H_NOT_AVAILABLE; + } + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); + return H_PARAMETER; + } + + memcpy(&hv_state, hvstate, len); + + address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); + + /* + * We accept versions 1 and 2. Version 2 fields are unused because TCG + * does not implement DAWR*. + */ + if (hv_state.version > HV_GUEST_STATE_VERSION) { + return H_PARAMETER; + } + + if (hv_state.lpid == 0) { + return H_PARAMETER; + } + + spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1); + if (!spapr_cpu->nested_host_state) { + return H_NO_MEM; + } + + assert(env->spr[SPR_LPIDR] == 0); + assert(env->spr[SPR_DPDES] == 0); + nested_save_state(spapr_cpu->nested_host_state, cpu); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, false); + g_free(spapr_cpu->nested_host_state); + return H_P2; + } + + len = sizeof(l2_state.gpr); + assert(len == sizeof(regs->gpr)); + memcpy(l2_state.gpr, regs->gpr, len); + + l2_state.lr = regs->link; + l2_state.ctr = regs->ctr; + l2_state.xer = regs->xer; + l2_state.cr = regs->ccr; + l2_state.msr = regs->msr; + l2_state.nip = regs->nip; + + address_space_unmap(CPU(cpu)->as, regs, len, len, false); + + l2_state.cfar = hv_state.cfar; + l2_state.lpidr = hv_state.lpid; + + lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; + lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); + lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; + lpcr &= ~LPCR_LPES0; + l2_state.lpcr = lpcr & pcc->lpcr_mask; + + l2_state.pcr = hv_state.pcr; + /* hv_state.amor is not used */ + l2_state.dpdes = hv_state.dpdes; + l2_state.hfscr = hv_state.hfscr; + /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ + l2_state.srr0 = hv_state.srr0; + l2_state.srr1 = hv_state.srr1; + l2_state.sprg0 = hv_state.sprg[0]; + l2_state.sprg1 = hv_state.sprg[1]; + l2_state.sprg2 = hv_state.sprg[2]; + l2_state.sprg3 = hv_state.sprg[3]; + l2_state.pidr = hv_state.pidr; + l2_state.ppr = hv_state.ppr; + l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset; + + /* + * Switch to the nested guest environment and start the "hdec" timer. + */ + nested_load_state(cpu, &l2_state); + + hdec = hv_state.hdec_expiry - now; + cpu_ppc_hdecr_init(env); + cpu_ppc_store_hdecr(env, hdec); + + /* + * The hv_state.vcpu_token is not needed. It is used by the KVM + * implementation to remember which L2 vCPU last ran on which physical + * CPU so as to invalidate process scope translations if it is moved + * between physical CPUs. For now TLBs are always flushed on L1<->L2 + * transitions so this is not a problem. + * + * Could validate that the same vcpu_token does not attempt to run on + * different L1 vCPUs at the same time, but that would be a L1 KVM bug + * and it's not obviously worth a new data structure to do it. + */ + + spapr_cpu->in_nested = true; + + /* + * The spapr hcall helper sets env->gpr[3] to the return value, but at + * this point the L1 is not returning from the hcall but rather we + * start running the L2, so r3 must not be clobbered, so return env->gpr[3] + * to leave it unchanged. + */ + return env->gpr[3]; +} + +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + struct nested_ppc_state l2_state; + target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; + target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; + target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_pt_regs *regs; + hwaddr len; + + assert(spapr_cpu->in_nested); + + nested_save_state(&l2_state, cpu); + hsrr0 = env->spr[SPR_HSRR0]; + hsrr1 = env->spr[SPR_HSRR1]; + hdar = env->spr[SPR_HDAR]; + hdsisr = env->spr[SPR_HDSISR]; + asdr = env->spr[SPR_ASDR]; + + /* + * Switch back to the host environment (including for any error). + */ + assert(env->spr[SPR_LPIDR] != 0); + nested_load_state(cpu, spapr_cpu->nested_host_state); + env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */ + + cpu_ppc_hdecr_exit(env); + + spapr_cpu->in_nested = false; + + g_free(spapr_cpu->nested_host_state); + spapr_cpu->nested_host_state = NULL; + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); + env->gpr[3] = H_PARAMETER; + return; + } + + hvstate->cfar = l2_state.cfar; + hvstate->lpcr = l2_state.lpcr; + hvstate->pcr = l2_state.pcr; + hvstate->dpdes = l2_state.dpdes; + hvstate->hfscr = l2_state.hfscr; + + if (excp == POWERPC_EXCP_HDSI) { + hvstate->hdar = hdar; + hvstate->hdsisr = hdsisr; + hvstate->asdr = asdr; + } else if (excp == POWERPC_EXCP_HISI) { + hvstate->asdr = asdr; + } + + /* HEIR should be implemented for HV mode and saved here. */ + hvstate->srr0 = l2_state.srr0; + hvstate->srr1 = l2_state.srr1; + hvstate->sprg[0] = l2_state.sprg0; + hvstate->sprg[1] = l2_state.sprg1; + hvstate->sprg[2] = l2_state.sprg2; + hvstate->sprg[3] = l2_state.sprg3; + hvstate->pidr = l2_state.pidr; + hvstate->ppr = l2_state.ppr; + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, true); + env->gpr[3] = H_P2; + return; + } + + len = sizeof(env->gpr); + assert(len == sizeof(regs->gpr)); + memcpy(regs->gpr, l2_state.gpr, len); + + regs->link = l2_state.lr; + regs->ctr = l2_state.ctr; + regs->xer = l2_state.xer; + regs->ccr = l2_state.cr; + + if (excp == POWERPC_EXCP_MCHECK || + excp == POWERPC_EXCP_RESET || + excp == POWERPC_EXCP_SYSCALL) { + regs->nip = l2_state.srr0; + regs->msr = l2_state.srr1 & env->msr_mask; + } else { + regs->nip = hsrr0; + regs->msr = hsrr1 & env->msr_mask; + } + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, regs, len, len, true); +} + +void spapr_register_nested(void) +{ + spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); + spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); + spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); + spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); +} +#else +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + g_assert_not_reached(); +} + +void spapr_register_nested(void) +{ + /* DO NOTHING */ +} +#endif diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c index 1c7786b52c..2052d721e5 100644 --- a/hw/remote/proxy.c +++ b/hw/remote/proxy.c @@ -22,7 +22,6 @@ #include "qom/object.h" #include "qemu/event_notifier.h" #include "sysemu/kvm.h" -#include "util/event_notifier-posix.c" static void probe_pci_info(PCIDevice *dev, Error **errp); static void proxy_device_reset(DeviceState *dev); diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index 2c5546560a..81f7e53aed 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -354,6 +354,8 @@ static void spike_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; mc->numa_mem_supported = true; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; mc->default_ram_id = "riscv.spike.ram"; object_class_property_add_str(oc, "signature", NULL, spike_set_signature); object_class_property_set_description(oc, "signature", diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 95708d890e..ed4c27487e 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -1669,6 +1669,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; mc->numa_mem_supported = true; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; mc->default_ram_id = "riscv_virt_board.ram"; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = virt_machine_get_hotplug_handler; diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h index 9fb98bc1ef..d04170aa27 100644 --- a/include/exec/memattrs.h +++ b/include/exec/memattrs.h @@ -29,10 +29,17 @@ typedef struct MemTxAttrs { * "didn't specify" if necessary. */ unsigned int unspecified:1; - /* ARM/AMBA: TrustZone Secure access + /* + * ARM/AMBA: TrustZone Secure access * x86: System Management Mode access */ unsigned int secure:1; + /* + * ARM: ArmSecuritySpace. This partially overlaps secure, but it is + * easier to have both fields to assist code that does not understand + * ARMv9 RME, or no specific knowledge of ARM at all (e.g. pflash). + */ + unsigned int space:2; /* Memory access is usermode (unprivileged) */ unsigned int user:1; /* diff --git a/include/hw/boards.h b/include/hw/boards.h index a385010909..6b267c21ce 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -274,6 +274,7 @@ struct MachineClass { bool nvdimm_supported; bool numa_mem_supported; bool auto_enable_numa; + bool cpu_cluster_has_numa_boundary; SMPCompatProps smp_props; const char *default_ram_id; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 4871ad85f0..3b765beb9b 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -402,6 +402,7 @@ struct CPUState { struct kvm_dirty_gfn *kvm_dirty_gfns; uint32_t kvm_fetch_index; uint64_t dirty_pages; + int kvm_vcpu_stats_fd; /* Use by accel-block: CPU is executing an ioctl() */ QemuLockCnt in_ioctl_lock; diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index 02af03ada2..e095c002dc 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -6,6 +6,7 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level); PowerPCCPU *ppc_get_vcpu_by_pir(int pir); int ppc_cpu_pir(PowerPCCPU *cpu); +int ppc_cpu_tir(PowerPCCPU *cpu); /* PowerPC hardware exceptions management helpers */ typedef void (*clk_setup_cb)(void *opaque, uint32_t freq); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index bd5a6c4780..538b2dfb89 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -621,66 +621,6 @@ struct SpaprMachineState { #define SVM_H_TPM_COMM 0xEF10 #define SVM_HCALL_MAX SVM_H_TPM_COMM -/* - * Register state for entering a nested guest with H_ENTER_NESTED. - * New member must be added at the end. - */ -struct kvmppc_hv_guest_state { - uint64_t version; /* version of this structure layout, must be first */ - uint32_t lpid; - uint32_t vcpu_token; - /* These registers are hypervisor privileged (at least for writing) */ - uint64_t lpcr; - uint64_t pcr; - uint64_t amor; - uint64_t dpdes; - uint64_t hfscr; - int64_t tb_offset; - uint64_t dawr0; - uint64_t dawrx0; - uint64_t ciabr; - uint64_t hdec_expiry; - uint64_t purr; - uint64_t spurr; - uint64_t ic; - uint64_t vtb; - uint64_t hdar; - uint64_t hdsisr; - uint64_t heir; - uint64_t asdr; - /* These are OS privileged but need to be set late in guest entry */ - uint64_t srr0; - uint64_t srr1; - uint64_t sprg[4]; - uint64_t pidr; - uint64_t cfar; - uint64_t ppr; - /* Version 1 ends here */ - uint64_t dawr1; - uint64_t dawrx1; - /* Version 2 ends here */ -}; - -/* Latest version of hv_guest_state structure */ -#define HV_GUEST_STATE_VERSION 2 - -/* Linux 64-bit powerpc pt_regs struct, used by nested HV */ -struct kvmppc_pt_regs { - uint64_t gpr[32]; - uint64_t nip; - uint64_t msr; - uint64_t orig_gpr3; /* Used for restarting system calls */ - uint64_t ctr; - uint64_t link; - uint64_t xer; - uint64_t ccr; - uint64_t softe; /* Soft enabled/disabled */ - uint64_t trap; /* Reason for being here */ - uint64_t dar; /* Fault registers */ - uint64_t dsisr; /* on 4xx/Book-E used for ESR */ - uint64_t result; /* Result of a system call */ -}; - typedef struct SpaprDeviceTreeUpdateHeader { uint32_t version_id; } SpaprDeviceTreeUpdateHeader; @@ -698,8 +638,6 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn); target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args); -void spapr_exit_nested(PowerPCCPU *cpu, int excp); - target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong shift); target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, SpaprMachineState *spapr, diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h index b560514560..69a52e39b8 100644 --- a/include/hw/ppc/spapr_cpu_core.h +++ b/include/hw/ppc/spapr_cpu_core.h @@ -41,6 +41,8 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong r1, target_ulong r3, target_ulong r4); +struct nested_ppc_state; + typedef struct SpaprCpuState { uint64_t vpa_addr; uint64_t slb_shadow_addr, slb_shadow_size; @@ -51,8 +53,7 @@ typedef struct SpaprCpuState { /* Fields for nested-HV support */ bool in_nested; /* true while the L2 is executing */ - CPUPPCState *nested_host_state; /* holds the L1 state while L2 executes */ - int64_t nested_tb_offset; /* L1->L2 TB offset */ + struct nested_ppc_state *nested_host_state; /* holds the L1 state while L2 executes */ } SpaprCpuState; static inline SpaprCpuState *spapr_cpu_state(PowerPCCPU *cpu) diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h new file mode 100644 index 0000000000..d383486476 --- /dev/null +++ b/include/hw/ppc/spapr_nested.h @@ -0,0 +1,102 @@ +#ifndef HW_SPAPR_NESTED_H +#define HW_SPAPR_NESTED_H + +#include "qemu/osdep.h" +#include "target/ppc/cpu.h" + +/* + * Register state for entering a nested guest with H_ENTER_NESTED. + * New member must be added at the end. + */ +struct kvmppc_hv_guest_state { + uint64_t version; /* version of this structure layout, must be first */ + uint32_t lpid; + uint32_t vcpu_token; + /* These registers are hypervisor privileged (at least for writing) */ + uint64_t lpcr; + uint64_t pcr; + uint64_t amor; + uint64_t dpdes; + uint64_t hfscr; + int64_t tb_offset; + uint64_t dawr0; + uint64_t dawrx0; + uint64_t ciabr; + uint64_t hdec_expiry; + uint64_t purr; + uint64_t spurr; + uint64_t ic; + uint64_t vtb; + uint64_t hdar; + uint64_t hdsisr; + uint64_t heir; + uint64_t asdr; + /* These are OS privileged but need to be set late in guest entry */ + uint64_t srr0; + uint64_t srr1; + uint64_t sprg[4]; + uint64_t pidr; + uint64_t cfar; + uint64_t ppr; + /* Version 1 ends here */ + uint64_t dawr1; + uint64_t dawrx1; + /* Version 2 ends here */ +}; + +/* Latest version of hv_guest_state structure */ +#define HV_GUEST_STATE_VERSION 2 + +/* Linux 64-bit powerpc pt_regs struct, used by nested HV */ +struct kvmppc_pt_regs { + uint64_t gpr[32]; + uint64_t nip; + uint64_t msr; + uint64_t orig_gpr3; /* Used for restarting system calls */ + uint64_t ctr; + uint64_t link; + uint64_t xer; + uint64_t ccr; + uint64_t softe; /* Soft enabled/disabled */ + uint64_t trap; /* Reason for being here */ + uint64_t dar; /* Fault registers */ + uint64_t dsisr; /* on 4xx/Book-E used for ESR */ + uint64_t result; /* Result of a system call */ +}; + +/* + * nested_ppc_state is used to save the host CPU state before switching it to + * the guest CPU state, to be restored on H_ENTER_NESTED exit. + */ +struct nested_ppc_state { + uint64_t gpr[32]; + uint64_t lr; + uint64_t ctr; + uint64_t cfar; + uint64_t msr; + uint64_t nip; + uint32_t cr; + + uint64_t xer; + + uint64_t lpcr; + uint64_t lpidr; + uint64_t pidr; + uint64_t pcr; + uint64_t dpdes; + uint64_t hfscr; + uint64_t srr0; + uint64_t srr1; + uint64_t sprg0; + uint64_t sprg1; + uint64_t sprg2; + uint64_t sprg3; + uint64_t ppr; + + int64_t tb_offset; +}; + +void spapr_register_nested(void); +void spapr_exit_nested(PowerPCCPU *cpu, int excp); + +#endif /* HW_SPAPR_NESTED_H */ diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index f7eea4ca81..3dfb06e002 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -430,6 +430,8 @@ typedef struct XivePresenterClass XivePresenterClass; DECLARE_CLASS_CHECKERS(XivePresenterClass, XIVE_PRESENTER, TYPE_XIVE_PRESENTER) +#define XIVE_PRESENTER_GEN1_TIMA_OS 0x1 + struct XivePresenterClass { InterfaceClass parent; int (*match_nvt)(XivePresenter *xptr, uint8_t format, @@ -437,6 +439,7 @@ struct XivePresenterClass { bool cam_ignore, uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match); bool (*in_kernel)(const XivePresenter *xptr); + uint32_t (*get_config)(XivePresenter *xptr); }; int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index c2f49df1f9..a309f90c76 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -184,4 +184,17 @@ #define QEMU_DISABLE_CFI #endif +/* + * Apple clang version 14 has a bug in its __builtin_subcll(); define + * BUILTIN_SUBCLL_BROKEN for the offending versions so we can avoid it. + * When a version of Apple clang which has this bug fixed is released + * we can add an upper bound to this check. + * See https://gitlab.com/qemu-project/qemu/-/issues/1631 + * and https://gitlab.com/qemu-project/qemu/-/issues/1659 for details. + * The bug never made it into any upstream LLVM releases, only Apple ones. + */ +#if defined(__apple_build_version__) && __clang_major__ >= 14 +#define BUILTIN_SUBCLL_BROKEN +#endif + #endif /* COMPILER_H */ diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index d3b4dce6a9..011618373e 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -649,7 +649,7 @@ static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) */ static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) { -#if __has_builtin(__builtin_subcll) +#if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN) unsigned long long b = *pborrow; x = __builtin_subcll(x, y, b, &b); *pborrow = b & 1; diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index 2d0918a93f..9eeda551ea 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -211,6 +211,9 @@ void cpu_loop(CPUX86State *env) switch(trapnr) { case 0x80: +#ifndef TARGET_X86_64 + case EXCP_SYSCALL: +#endif /* linux syscall from int $0x80 */ ret = do_syscall(env, env->regs[R_EAX], @@ -227,9 +230,9 @@ void cpu_loop(CPUX86State *env) env->regs[R_EAX] = ret; } break; -#ifndef TARGET_ABI32 +#ifdef TARGET_X86_64 case EXCP_SYSCALL: - /* linux syscall from syscall instruction */ + /* linux syscall from syscall instruction. */ ret = do_syscall(env, env->regs[R_EAX], env->regs[R_EDI], @@ -245,8 +248,6 @@ void cpu_loop(CPUX86State *env) env->regs[R_EAX] = ret; } break; -#endif -#ifdef TARGET_X86_64 case EXCP_VSYSCALL: emulate_vsyscall(env); break; diff --git a/meson.build b/meson.build index 6ef78ea278..3e3d38badb 100644 --- a/meson.build +++ b/meson.build @@ -2232,6 +2232,8 @@ config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime')) config_host_data.set('CONFIG_DUP3', cc.has_function('dup3')) config_host_data.set('CONFIG_FALLOCATE', cc.has_function('fallocate')) config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate')) +config_host_data.set('CONFIG_GETCPU', cc.has_function('getcpu', prefix: gnu_source_prefix)) +config_host_data.set('CONFIG_SCHED_GETCPU', cc.has_function('sched_getcpu', prefix: '#include <sched.h>')) # Note that we need to specify prefix: here to avoid incorrectly # thinking that Windows has posix_memalign() config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign', prefix: '#include <stdlib.h>')) diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img Binary files differindex e7660b0458..0fa3808f16 100644 --- a/pc-bios/hppa-firmware.img +++ b/pc-bios/hppa-firmware.img diff --git a/pc-bios/keymaps/meson.build b/pc-bios/keymaps/meson.build index bff3083313..0bd8ce0077 100644 --- a/pc-bios/keymaps/meson.build +++ b/pc-bios/keymaps/meson.build @@ -1,5 +1,5 @@ keymaps = { - 'ar': '-l ar', + 'ar': '-l ara', 'bepo': '-l fr -v dvorak', 'cz': '-l cz', 'da': '-l dk', diff --git a/roms/seabios-hppa b/roms/seabios-hppa -Subproject 1cfbe76ff625fce9ed5991f7e13d80ffec900f4 +Subproject 673d2595d4f773cc266cbf8dbaf2f475a6adb94 diff --git a/scripts/git-submodule.sh b/scripts/git-submodule.sh index 11fad2137c..335f7f5fdf 100755 --- a/scripts/git-submodule.sh +++ b/scripts/git-submodule.sh @@ -9,13 +9,22 @@ command=$1 shift maybe_modules="$@" -# if not running in a git checkout, do nothing -test "$command" = "ignore" && exit 0 - +test -z "$maybe_modules" && exit 0 test -z "$GIT" && GIT=$(command -v git) cd "$(dirname "$0")/.." +no_git_error= +if ! test -e ".git"; then + no_git_error='no git checkout exists' +elif test -z "$GIT"; then + no_git_error='git binary not found' +fi + +is_git() { + test -z "$no_git_error" +} + update_error() { echo "$0: $*" echo @@ -34,7 +43,7 @@ update_error() { } validate_error() { - if test "$1" = "validate"; then + if is_git && test "$1" = "validate"; then echo "GIT submodules checkout is out of date, and submodules" echo "configured for validate only. Please run" echo " scripts/git-submodule.sh update $maybe_modules" @@ -51,42 +60,42 @@ check_updated() { test "$CURSTATUS" = "$OLDSTATUS" } -if test -n "$maybe_modules" && ! test -e ".git" -then - echo "$0: unexpectedly called with submodules but no git checkout exists" - exit 1 -fi - -if test -n "$maybe_modules" && test -z "$GIT" -then - echo "$0: unexpectedly called with submodules but git binary not found" - exit 1 +if is_git; then + test -e $substat || touch $substat + modules="" + for m in $maybe_modules + do + $GIT submodule status $m 1> /dev/null 2>&1 + if test $? = 0 + then + modules="$modules $m" + grep $m $substat > /dev/null 2>&1 || $GIT submodule status $module >> $substat + else + echo "warn: ignoring non-existent submodule $m" + fi + done +else + modules=$maybe_modules fi -modules="" -for m in $maybe_modules -do - $GIT submodule status $m 1> /dev/null 2>&1 - if test $? = 0 - then - modules="$modules $m" - else - echo "warn: ignoring non-existent submodule $m" - fi -done - case "$command" in status|validate) - test -f "$substat" || validate_error "$command" - test -z "$maybe_modules" && exit 0 for module in $modules; do - check_updated $module || validate_error "$command" + if is_git; then + check_updated $module || validate_error "$command" + elif ! (set xyz "$module"/* && test -e "$2"); then + # The directory does not exist or it contains no files + echo "$0: sources not available for $module and $no_git_error" + validate_error "$command" + fi done - exit 0 ;; + update) - test -e $substat || touch $substat - test -z "$maybe_modules" && exit 0 + is_git || { + echo "$0: unexpectedly called with submodules but $no_git_error" + exit 1 + } $GIT submodule update --init $modules 1>/dev/null test $? -ne 0 && update_error "failed to update modules" diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 353fc48567..a1e77698ba 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1402,25 +1402,27 @@ void arm_cpu_post_init(Object *obj) * KVM does not currently allow us to lie to the guest about its * ID/feature registers, so the guest always sees what the host has. */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) - ? cpu_isar_feature(aa64_fp_simd, cpu) - : cpu_isar_feature(aa32_vfp, cpu)) { - cpu->has_vfp = true; - if (!kvm_enabled()) { - qdev_property_add_static(DEVICE(obj), &arm_cpu_has_vfp_property); + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + if (cpu_isar_feature(aa64_fp_simd, cpu)) { + cpu->has_vfp = true; + cpu->has_vfp_d32 = true; + if (tcg_enabled() || qtest_enabled()) { + qdev_property_add_static(DEVICE(obj), + &arm_cpu_has_vfp_property); + } } - } - - if (cpu->has_vfp && cpu_isar_feature(aa32_simd_r32, cpu)) { - cpu->has_vfp_d32 = true; - if (!kvm_enabled()) { + } else if (cpu_isar_feature(aa32_vfp, cpu)) { + cpu->has_vfp = true; + if (cpu_isar_feature(aa32_simd_r32, cpu)) { + cpu->has_vfp_d32 = true; /* * The permitted values of the SIMDReg bits [3:0] on * Armv8-A are either 0b0000 and 0b0010. On such CPUs, * make sure that has_vfp_d32 can not be set to false. */ - if (!(arm_feature(&cpu->env, ARM_FEATURE_V8) && - !arm_feature(&cpu->env, ARM_FEATURE_M))) { + if ((tcg_enabled() || qtest_enabled()) + && !(arm_feature(&cpu->env, ARM_FEATURE_V8) + && !arm_feature(&cpu->env, ARM_FEATURE_M))) { qdev_property_add_static(DEVICE(obj), &arm_cpu_has_vfp_d32_property); } @@ -1989,6 +1991,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0); cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, EL3, 0); + + /* Disable the realm management extension, which requires EL3. */ + cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, + ID_AA64PFR0, RME, 0); } if (!cpu->has_el2) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index af0119addf..11c3850ad9 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -57,6 +57,7 @@ #define EXCP_UNALIGNED 22 /* v7M UNALIGNED UsageFault */ #define EXCP_DIVBYZERO 23 /* v7M DIVBYZERO UsageFault */ #define EXCP_VSERR 24 +#define EXCP_GPC 25 /* v9 Granule Protection Check Fault */ /* NB: add new EXCP_ defines to the array in arm_log_exception() too */ #define ARMV7M_EXCP_RESET 1 @@ -541,6 +542,11 @@ typedef struct CPUArchState { uint64_t fgt_read[2]; /* HFGRTR, HDFGRTR */ uint64_t fgt_write[2]; /* HFGWTR, HDFGWTR */ uint64_t fgt_exec[1]; /* HFGITR */ + + /* RME registers */ + uint64_t gpccr_el3; + uint64_t gptbr_el3; + uint64_t mfar_el3; } cp15; struct { @@ -1055,6 +1061,7 @@ struct ArchCPU { uint64_t reset_cbar; uint32_t reset_auxcr; bool reset_hivecs; + uint8_t reset_l0gptsz; /* * Intermediate values used during property parsing. @@ -1655,7 +1662,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define HCR_TERR (1ULL << 36) #define HCR_TEA (1ULL << 37) #define HCR_MIOCNCE (1ULL << 38) -/* RES0 bit 39 */ +#define HCR_TME (1ULL << 39) #define HCR_APK (1ULL << 40) #define HCR_API (1ULL << 41) #define HCR_NV (1ULL << 42) @@ -1664,7 +1671,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define HCR_NV2 (1ULL << 45) #define HCR_FWB (1ULL << 46) #define HCR_FIEN (1ULL << 47) -/* RES0 bit 48 */ +#define HCR_GPF (1ULL << 48) #define HCR_TID4 (1ULL << 49) #define HCR_TICAB (1ULL << 50) #define HCR_AMVOFFEN (1ULL << 51) @@ -1729,6 +1736,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define SCR_TRNDR (1ULL << 40) #define SCR_ENTP2 (1ULL << 41) #define SCR_GPF (1ULL << 48) +#define SCR_NSE (1ULL << 62) #define HSTR_TTEE (1 << 16) #define HSTR_TJDBX (1 << 17) @@ -2195,6 +2203,7 @@ FIELD(ID_AA64PFR0, SEL2, 36, 4) FIELD(ID_AA64PFR0, MPAM, 40, 4) FIELD(ID_AA64PFR0, AMU, 44, 4) FIELD(ID_AA64PFR0, DIT, 48, 4) +FIELD(ID_AA64PFR0, RME, 52, 4) FIELD(ID_AA64PFR0, CSV2, 56, 4) FIELD(ID_AA64PFR0, CSV3, 60, 4) @@ -2339,6 +2348,19 @@ FIELD(MVFR1, SIMDFMAC, 28, 4) FIELD(MVFR2, SIMDMISC, 0, 4) FIELD(MVFR2, FPMISC, 4, 4) +FIELD(GPCCR, PPS, 0, 3) +FIELD(GPCCR, IRGN, 8, 2) +FIELD(GPCCR, ORGN, 10, 2) +FIELD(GPCCR, SH, 12, 2) +FIELD(GPCCR, PGS, 14, 2) +FIELD(GPCCR, GPC, 16, 1) +FIELD(GPCCR, GPCP, 17, 1) +FIELD(GPCCR, L0GPTSZ, 20, 4) + +FIELD(MFAR, FPA, 12, 40) +FIELD(MFAR, NSE, 62, 1) +FIELD(MFAR, NS, 63, 1) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -2393,25 +2415,53 @@ static inline int arm_feature(CPUARMState *env, int feature) void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp); -#if !defined(CONFIG_USER_ONLY) /* + * ARM v9 security states. + * The ordering of the enumeration corresponds to the low 2 bits + * of the GPI value, and (except for Root) the concat of NSE:NS. + */ + +typedef enum ARMSecuritySpace { + ARMSS_Secure = 0, + ARMSS_NonSecure = 1, + ARMSS_Root = 2, + ARMSS_Realm = 3, +} ARMSecuritySpace; + +/* Return true if @space is secure, in the pre-v9 sense. */ +static inline bool arm_space_is_secure(ARMSecuritySpace space) +{ + return space == ARMSS_Secure || space == ARMSS_Root; +} + +/* Return the ARMSecuritySpace for @secure, assuming !RME or EL[0-2]. */ +static inline ARMSecuritySpace arm_secure_to_space(bool secure) +{ + return secure ? ARMSS_Secure : ARMSS_NonSecure; +} + +#if !defined(CONFIG_USER_ONLY) +/** + * arm_security_space_below_el3: + * @env: cpu context + * + * Return the security space of exception levels below EL3, following + * an exception return to those levels. Unlike arm_security_space, + * this doesn't care about the current EL. + */ +ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env); + +/** + * arm_is_secure_below_el3: + * @env: cpu context + * * Return true if exception levels below EL3 are in secure state, - * or would be following an exception return to that level. - * Unlike arm_is_secure() (which is always a question about the - * _current_ state of the CPU) this doesn't care about the current - * EL or mode. + * or would be following an exception return to those levels. */ static inline bool arm_is_secure_below_el3(CPUARMState *env) { - assert(!arm_feature(env, ARM_FEATURE_M)); - if (arm_feature(env, ARM_FEATURE_EL3)) { - return !(env->cp15.scr_el3 & SCR_NS); - } else { - /* If EL3 is not supported then the secure state is implementation - * defined, in which case QEMU defaults to non-secure. - */ - return false; - } + ARMSecuritySpace ss = arm_security_space_below_el3(env); + return ss == ARMSS_Secure; } /* Return true if the CPU is AArch64 EL3 or AArch32 Mon */ @@ -2431,16 +2481,23 @@ static inline bool arm_is_el3_or_mon(CPUARMState *env) return false; } -/* Return true if the processor is in secure state */ +/** + * arm_security_space: + * @env: cpu context + * + * Return the current security space of the cpu. + */ +ARMSecuritySpace arm_security_space(CPUARMState *env); + +/** + * arm_is_secure: + * @env: cpu context + * + * Return true if the processor is in secure state. + */ static inline bool arm_is_secure(CPUARMState *env) { - if (arm_feature(env, ARM_FEATURE_M)) { - return env->v7m.secure; - } - if (arm_is_el3_or_mon(env)) { - return true; - } - return arm_is_secure_below_el3(env); + return arm_space_is_secure(arm_security_space(env)); } /* @@ -2459,11 +2516,21 @@ static inline bool arm_is_el2_enabled(CPUARMState *env) } #else +static inline ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env) +{ + return ARMSS_NonSecure; +} + static inline bool arm_is_secure_below_el3(CPUARMState *env) { return false; } +static inline ARMSecuritySpace arm_security_space(CPUARMState *env) +{ + return ARMSS_NonSecure; +} + static inline bool arm_is_secure(CPUARMState *env) { return false; @@ -2794,18 +2861,20 @@ typedef enum ARMMMUIdx { ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, - /* TLBs with 1-1 mapping to the physical address spaces. */ - ARMMMUIdx_Phys_NS = 8 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_S = 9 | ARM_MMU_IDX_A, - /* * Used for second stage of an S12 page table walk, or for descriptor * loads during first stage of an S1 page table walk. Note that both * are in use simultaneously for SecureEL2: the security state for * the S2 ptw is selected by the NS bit from the S1 ptw. */ - ARMMMUIdx_Stage2 = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2_S = 11 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A, + + /* TLBs with 1-1 mapping to the physical address spaces. */ + ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A, /* * These are not allocated TLBs and are used only for AT system @@ -2869,6 +2938,23 @@ typedef enum ARMASIdx { ARMASIdx_TagS = 3, } ARMASIdx; +static inline ARMMMUIdx arm_space_to_phys(ARMSecuritySpace space) +{ + /* Assert the relative order of the physical mmu indexes. */ + QEMU_BUILD_BUG_ON(ARMSS_Secure != 0); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS != ARMMMUIdx_Phys_S + ARMSS_NonSecure); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_Root != ARMMMUIdx_Phys_S + ARMSS_Root); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_Realm != ARMMMUIdx_Phys_S + ARMSS_Realm); + + return ARMMMUIdx_Phys_S + space; +} + +static inline ARMSecuritySpace arm_phys_to_space(ARMMMUIdx idx) +{ + assert(idx >= ARMMMUIdx_Phys_S && idx <= ARMMMUIdx_Phys_Realm); + return idx - ARMMMUIdx_Phys_S; +} + static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu) { /* If all the CLIDR.Ctypem bits are 0 there are no caches, and @@ -3814,6 +3900,11 @@ static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0; } +static inline bool isar_feature_aa64_rme(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0; +} + static inline bool isar_feature_aa64_vh(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0; diff --git a/target/arm/helper.c b/target/arm/helper.c index d4bee43bd0..323cadd3c8 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -1855,6 +1855,9 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) } if (cpu_isar_feature(aa64_sel2, cpu)) { valid_mask |= SCR_EEL2; + } else if (cpu_isar_feature(aa64_rme, cpu)) { + /* With RME and without SEL2, NS is RES1 (R_GSWWH, I_DJJQJ). */ + value |= SCR_NS; } if (cpu_isar_feature(aa64_mte, cpu)) { valid_mask |= SCR_ATA; @@ -1874,6 +1877,9 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) if (cpu_isar_feature(aa64_fgt, cpu)) { valid_mask |= SCR_FGTEN; } + if (cpu_isar_feature(aa64_rme, cpu)) { + valid_mask |= SCR_NSE | SCR_GPF; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); if (cpu_isar_feature(aa32_ras, cpu)) { @@ -1903,10 +1909,10 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) env->cp15.scr_el3 = value; /* - * If SCR_EL3.NS changes, i.e. arm_is_secure_below_el3, then + * If SCR_EL3.{NS,NSE} changes, i.e. change of security state, * we must invalidate all TLBs below EL3. */ - if (changed & SCR_NS) { + if (changed & (SCR_NS | SCR_NSE)) { tlb_flush_by_mmuidx(env_cpu(env), (ARMMMUIdxBit_E10_0 | ARMMMUIdxBit_E20_0 | ARMMMUIdxBit_E10_1 | @@ -5654,6 +5660,9 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_fwb, cpu)) { valid_mask |= HCR_FWB; } + if (cpu_isar_feature(aa64_rme, cpu)) { + valid_mask |= HCR_GPF; + } } if (cpu_isar_feature(any_evt, cpu)) { @@ -6901,6 +6910,83 @@ static const ARMCPRegInfo sme_reginfo[] = { .access = PL2_RW, .accessfn = access_esm, .type = ARM_CP_CONST, .resetvalue = 0 }, }; + +static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush(cs); +} + +static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* L0GPTSZ is RO; other bits not mentioned are RES0. */ + uint64_t rw_mask = R_GPCCR_PPS_MASK | R_GPCCR_IRGN_MASK | + R_GPCCR_ORGN_MASK | R_GPCCR_SH_MASK | R_GPCCR_PGS_MASK | + R_GPCCR_GPC_MASK | R_GPCCR_GPCP_MASK; + + env->cp15.gpccr_el3 = (value & rw_mask) | (env->cp15.gpccr_el3 & ~rw_mask); +} + +static void gpccr_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + env->cp15.gpccr_el3 = FIELD_DP64(0, GPCCR, L0GPTSZ, + env_archcpu(env)->reset_l0gptsz); +} + +static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_all_cpus_synced(cs); +} + +static const ARMCPRegInfo rme_reginfo[] = { + { .name = "GPCCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 6, + .access = PL3_RW, .writefn = gpccr_write, .resetfn = gpccr_reset, + .fieldoffset = offsetof(CPUARMState, cp15.gpccr_el3) }, + { .name = "GPTBR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 4, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.gptbr_el3) }, + { .name = "MFAR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 5, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mfar_el3) }, + { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paall_write }, + { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + /* + * QEMU does not have a way to invalidate by physical address, thus + * invalidating a range of physical addresses is accomplished by + * flushing all tlb entries in the outer sharable domain, + * just like PAALLOS. + */ + { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + { .name = "DC_CIPAPA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NOP }, +}; + +static const ARMCPRegInfo rme_mte_reginfo[] = { + { .name = "DC_CIGDPAPA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NOP }, +}; #endif /* TARGET_AARCH64 */ static void define_pmu_regs(ARMCPU *cpu) @@ -9121,6 +9207,13 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_fgt, cpu)) { define_arm_cp_regs(cpu, fgt_reginfo); } + + if (cpu_isar_feature(aa64_rme, cpu)) { + define_arm_cp_regs(cpu, rme_reginfo); + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, rme_mte_reginfo); + } + } #endif if (cpu_isar_feature(any_predinv, cpu)) { @@ -10091,6 +10184,7 @@ void arm_log_exception(CPUState *cs) [EXCP_UNALIGNED] = "v7M UNALIGNED UsageFault", [EXCP_DIVBYZERO] = "v7M DIVBYZERO UsageFault", [EXCP_VSERR] = "Virtual SERR", + [EXCP_GPC] = "Granule Protection Check", }; if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { @@ -10822,6 +10916,10 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } switch (cs->exception_index) { + case EXCP_GPC: + qemu_log_mask(CPU_LOG_INT, "...with MFAR 0x%" PRIx64 "\n", + env->cp15.mfar_el3); + /* fall through */ case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: /* @@ -12043,3 +12141,63 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, } } #endif + +#ifndef CONFIG_USER_ONLY +ARMSecuritySpace arm_security_space(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_M)) { + return arm_secure_to_space(env->v7m.secure); + } + + /* + * If EL3 is not supported then the secure state is implementation + * defined, in which case QEMU defaults to non-secure. + */ + if (!arm_feature(env, ARM_FEATURE_EL3)) { + return ARMSS_NonSecure; + } + + /* Check for AArch64 EL3 or AArch32 Mon. */ + if (is_a64(env)) { + if (extract32(env->pstate, 2, 2) == 3) { + if (cpu_isar_feature(aa64_rme, env_archcpu(env))) { + return ARMSS_Root; + } else { + return ARMSS_Secure; + } + } + } else { + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) { + return ARMSS_Secure; + } + } + + return arm_security_space_below_el3(env); +} + +ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env) +{ + assert(!arm_feature(env, ARM_FEATURE_M)); + + /* + * If EL3 is not supported then the secure state is implementation + * defined, in which case QEMU defaults to non-secure. + */ + if (!arm_feature(env, ARM_FEATURE_EL3)) { + return ARMSS_NonSecure; + } + + /* + * Note NSE cannot be set without RME, and NSE & !NS is Reserved. + * Ignoring NSE when !NS retains consistency without having to + * modify other predicates. + */ + if (!(env->cp15.scr_el3 & SCR_NS)) { + return ARMSS_Secure; + } else if (env->cp15.scr_el3 & SCR_NSE) { + return ARMSS_Realm; + } else { + return ARMSS_NonSecure; + } +} +#endif /* !CONFIG_USER_ONLY */ diff --git a/target/arm/internals.h b/target/arm/internals.h index e3029bdc37..0f01bc32a8 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -358,14 +358,27 @@ typedef enum ARMFaultType { ARMFault_ICacheMaint, ARMFault_QEMU_NSCExec, /* v8M: NS executing in S&NSC memory */ ARMFault_QEMU_SFault, /* v8M: SecureFault INVTRAN, INVEP or AUVIOL */ + ARMFault_GPCFOnWalk, + ARMFault_GPCFOnOutput, } ARMFaultType; +typedef enum ARMGPCF { + GPCF_None, + GPCF_AddressSize, + GPCF_Walk, + GPCF_EABT, + GPCF_Fail, +} ARMGPCF; + /** * ARMMMUFaultInfo: Information describing an ARM MMU Fault * @type: Type of fault + * @gpcf: Subtype of ARMFault_GPCFOn{Walk,Output}. * @level: Table walk level (for translation, access flag and permission faults) * @domain: Domain of the fault address (for non-LPAE CPUs only) * @s2addr: Address that caused a fault at stage 2 + * @paddr: physical address that caused a fault for gpc + * @paddr_space: physical address space that caused a fault for gpc * @stage2: True if we faulted at stage 2 * @s1ptw: True if we faulted at stage 2 while doing a stage 1 page-table walk * @s1ns: True if we faulted on a non-secure IPA while in secure state @@ -374,7 +387,10 @@ typedef enum ARMFaultType { typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; struct ARMMMUFaultInfo { ARMFaultType type; + ARMGPCF gpcf; target_ulong s2addr; + target_ulong paddr; + ARMSecuritySpace paddr_space; int level; int domain; bool stage2; @@ -548,6 +564,17 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi) case ARMFault_Exclusive: fsc = 0x35; break; + case ARMFault_GPCFOnWalk: + assert(fi->level >= -1 && fi->level <= 3); + if (fi->level < 0) { + fsc = 0b100011; + } else { + fsc = 0b100100 | fi->level; + } + break; + case ARMFault_GPCFOnOutput: + fsc = 0b101000; + break; default: /* Other faults can't occur in a context that requires a * long-format status code. diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 37bcb17a9e..6015121b99 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -21,28 +21,35 @@ typedef struct S1Translate { ARMMMUIdx in_mmu_idx; ARMMMUIdx in_ptw_idx; + ARMSecuritySpace in_space; bool in_secure; bool in_debug; + /* + * If this is stage 2 of a stage 1+2 page table walk, then this must + * be true if stage 1 is an EL0 access; otherwise this is ignored. + * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}. + */ + bool in_s1_is_el0; bool out_secure; bool out_rw; bool out_be; + ARMSecuritySpace out_space; hwaddr out_virt; hwaddr out_phys; void *out_host; } S1Translate; -static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, - uint64_t address, - MMUAccessType access_type, bool s1_is_el0, - GetPhysAddrResult *result, ARMMMUFaultInfo *fi) - __attribute__((nonnull)); +static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi); -static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, - target_ulong address, - MMUAccessType access_type, - GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) - __attribute__((nonnull)); +static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi); /* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ static const uint8_t pamax_map[] = { @@ -215,8 +222,10 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, case ARMMMUIdx_E3: break; - case ARMMMUIdx_Phys_NS: case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_Phys_Root: + case ARMMMUIdx_Phys_Realm: /* No translation for physical address spaces. */ return true; @@ -227,6 +236,197 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; } +static bool granule_protection_check(CPUARMState *env, uint64_t paddress, + ARMSecuritySpace pspace, + ARMMMUFaultInfo *fi) +{ + MemTxAttrs attrs = { + .secure = true, + .space = ARMSS_Root, + }; + ARMCPU *cpu = env_archcpu(env); + uint64_t gpccr = env->cp15.gpccr_el3; + unsigned pps, pgs, l0gptsz, level = 0; + uint64_t tableaddr, pps_mask, align, entry, index; + AddressSpace *as; + MemTxResult result; + int gpi; + + if (!FIELD_EX64(gpccr, GPCCR, GPC)) { + return true; + } + + /* + * GPC Priority 1 (R_GMGRR): + * R_JWCSM: If the configuration of GPCCR_EL3 is invalid, + * the access fails as GPT walk fault at level 0. + */ + + /* + * Configuration of PPS to a value exceeding the implemented + * physical address size is invalid. + */ + pps = FIELD_EX64(gpccr, GPCCR, PPS); + if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) { + goto fault_walk; + } + pps = pamax_map[pps]; + pps_mask = MAKE_64BIT_MASK(0, pps); + + switch (FIELD_EX64(gpccr, GPCCR, SH)) { + case 0b10: /* outer shareable */ + break; + case 0b00: /* non-shareable */ + case 0b11: /* inner shareable */ + /* Inner and Outer non-cacheable requires Outer shareable. */ + if (FIELD_EX64(gpccr, GPCCR, ORGN) == 0 && + FIELD_EX64(gpccr, GPCCR, IRGN) == 0) { + goto fault_walk; + } + break; + default: /* reserved */ + goto fault_walk; + } + + switch (FIELD_EX64(gpccr, GPCCR, PGS)) { + case 0b00: /* 4KB */ + pgs = 12; + break; + case 0b01: /* 64KB */ + pgs = 16; + break; + case 0b10: /* 16KB */ + pgs = 14; + break; + default: /* reserved */ + goto fault_walk; + } + + /* Note this field is read-only and fixed at reset. */ + l0gptsz = 30 + FIELD_EX64(gpccr, GPCCR, L0GPTSZ); + + /* + * GPC Priority 2: Secure, Realm or Root address exceeds PPS. + * R_CPDSB: A NonSecure physical address input exceeding PPS + * does not experience any fault. + */ + if (paddress & ~pps_mask) { + if (pspace == ARMSS_NonSecure) { + return true; + } + goto fault_size; + } + + /* GPC Priority 3: the base address of GPTBR_EL3 exceeds PPS. */ + tableaddr = env->cp15.gptbr_el3 << 12; + if (tableaddr & ~pps_mask) { + goto fault_size; + } + + /* + * BADDR is aligned per a function of PPS and L0GPTSZ. + * These bits of GPTBR_EL3 are RES0, but are not a configuration error, + * unlike the RES0 bits of the GPT entries (R_XNKFZ). + */ + align = MAX(pps - l0gptsz + 3, 12); + align = MAKE_64BIT_MASK(0, align); + tableaddr &= ~align; + + as = arm_addressspace(env_cpu(env), attrs); + + /* Level 0 lookup. */ + index = extract64(paddress, l0gptsz, pps - l0gptsz); + tableaddr += index * 8; + entry = address_space_ldq_le(as, tableaddr, attrs, &result); + if (result != MEMTX_OK) { + goto fault_eabt; + } + + switch (extract32(entry, 0, 4)) { + case 1: /* block descriptor */ + if (entry >> 8) { + goto fault_walk; /* RES0 bits not 0 */ + } + gpi = extract32(entry, 4, 4); + goto found; + case 3: /* table descriptor */ + tableaddr = entry & ~0xf; + align = MAX(l0gptsz - pgs - 1, 12); + align = MAKE_64BIT_MASK(0, align); + if (tableaddr & (~pps_mask | align)) { + goto fault_walk; /* RES0 bits not 0 */ + } + break; + default: /* invalid */ + goto fault_walk; + } + + /* Level 1 lookup */ + level = 1; + index = extract64(paddress, pgs + 4, l0gptsz - pgs - 4); + tableaddr += index * 8; + entry = address_space_ldq_le(as, tableaddr, attrs, &result); + if (result != MEMTX_OK) { + goto fault_eabt; + } + + switch (extract32(entry, 0, 4)) { + case 1: /* contiguous descriptor */ + if (entry >> 10) { + goto fault_walk; /* RES0 bits not 0 */ + } + /* + * Because the softmmu tlb only works on units of TARGET_PAGE_SIZE, + * and because we cannot invalidate by pa, and thus will always + * flush entire tlbs, we don't actually care about the range here + * and can simply extract the GPI as the result. + */ + if (extract32(entry, 8, 2) == 0) { + goto fault_walk; /* reserved contig */ + } + gpi = extract32(entry, 4, 4); + break; + default: + index = extract64(paddress, pgs, 4); + gpi = extract64(entry, index * 4, 4); + break; + } + + found: + switch (gpi) { + case 0b0000: /* no access */ + break; + case 0b1111: /* all access */ + return true; + case 0b1000: + case 0b1001: + case 0b1010: + case 0b1011: + if (pspace == (gpi & 3)) { + return true; + } + break; + default: + goto fault_walk; /* reserved */ + } + + fi->gpcf = GPCF_Fail; + goto fault_common; + fault_eabt: + fi->gpcf = GPCF_EABT; + goto fault_common; + fault_size: + fi->gpcf = GPCF_AddressSize; + goto fault_common; + fault_walk: + fi->gpcf = GPCF_Walk; + fault_common: + fi->level = level; + fi->paddr = paddress; + fi->paddr_space = pspace; + return false; +} + static bool S2_attrs_are_device(uint64_t hcr, uint8_t attrs) { /* @@ -249,6 +449,7 @@ static bool S2_attrs_are_device(uint64_t hcr, uint8_t attrs) static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, hwaddr addr, ARMMMUFaultInfo *fi) { + ARMSecuritySpace space = ptw->in_space; bool is_secure = ptw->in_secure; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx; @@ -261,30 +462,27 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, * From gdbstub, do not use softmmu so that we don't modify the * state of the cpu at all, including softmmu tlb contents. */ - if (regime_is_stage2(s2_mmu_idx)) { - S1Translate s2ptw = { - .in_mmu_idx = s2_mmu_idx, - .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx), - .in_secure = s2_mmu_idx == ARMMMUIdx_Stage2_S, - .in_debug = true, - }; - GetPhysAddrResult s2 = { }; - - if (get_phys_addr_lpae(env, &s2ptw, addr, MMU_DATA_LOAD, - false, &s2, fi)) { - goto fail; - } - ptw->out_phys = s2.f.phys_addr; - pte_attrs = s2.cacheattrs.attrs; - ptw->out_secure = s2.f.attrs.secure; - } else { - /* Regime is physical. */ - ptw->out_phys = addr; - pte_attrs = 0; - ptw->out_secure = s2_mmu_idx == ARMMMUIdx_Phys_S; + S1Translate s2ptw = { + .in_mmu_idx = s2_mmu_idx, + .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx), + .in_secure = s2_mmu_idx == ARMMMUIdx_Stage2_S, + .in_space = (s2_mmu_idx == ARMMMUIdx_Stage2_S ? ARMSS_Secure + : space == ARMSS_Realm ? ARMSS_Realm + : ARMSS_NonSecure), + .in_debug = true, + }; + GetPhysAddrResult s2 = { }; + + if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, &s2, fi)) { + goto fail; } + + ptw->out_phys = s2.f.phys_addr; + pte_attrs = s2.cacheattrs.attrs; ptw->out_host = NULL; ptw->out_rw = false; + ptw->out_secure = s2.f.attrs.secure; + ptw->out_space = s2.f.attrs.space; } else { #ifdef CONFIG_TCG CPUTLBEntryFull *full; @@ -303,6 +501,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, ptw->out_rw = full->prot & PAGE_WRITE; pte_attrs = full->pte_attrs; ptw->out_secure = full->attrs.secure; + ptw->out_space = full->attrs.space; #else g_assert_not_reached(); #endif @@ -330,6 +529,9 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, fail: assert(fi->type != ARMFault_None); + if (fi->type == ARMFault_GPCFOnOutput) { + fi->type = ARMFault_GPCFOnWalk; + } fi->s2addr = addr; fi->stage2 = true; fi->s1ptw = true; @@ -355,7 +557,10 @@ static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, } } else { /* Page tables are in MMIO. */ - MemTxAttrs attrs = { .secure = ptw->out_secure }; + MemTxAttrs attrs = { + .secure = ptw->out_secure, + .space = ptw->out_space, + }; AddressSpace *as = arm_addressspace(cs, attrs); MemTxResult result = MEMTX_OK; @@ -398,7 +603,10 @@ static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, #endif } else { /* Page tables are in MMIO. */ - MemTxAttrs attrs = { .secure = ptw->out_secure }; + MemTxAttrs attrs = { + .secure = ptw->out_secure, + .space = ptw->out_space, + }; AddressSpace *as = arm_addressspace(cs, attrs); MemTxResult result = MEMTX_OK; @@ -909,6 +1117,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, * regime, because the attribute will already be non-secure. */ result->f.attrs.secure = false; + result->f.attrs.space = ARMSS_NonSecure; } result->f.phys_addr = phys_addr; return false; @@ -925,7 +1134,7 @@ do_fault: * @xn: XN (execute-never) bits * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 */ -static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) +static int get_S2prot_noexecute(int s2ap) { int prot = 0; @@ -935,6 +1144,12 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) if (s2ap & 2) { prot |= PAGE_WRITE; } + return prot; +} + +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) +{ + int prot = get_S2prot_noexecute(s2ap); if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { switch (xn) { @@ -972,12 +1187,14 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) * @mmu_idx: MMU index indicating required translation regime * @is_aa64: TRUE if AArch64 * @ap: The 2-bit simple AP (AP[2:1]) - * @ns: NS (non-secure) bit * @xn: XN (execute-never) bit * @pxn: PXN (privileged execute-never) bit + * @in_pa: The original input pa space + * @out_pa: The output pa space, modified by NSTable, NS, and NSE */ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, - int ap, int ns, int xn, int pxn) + int ap, int xn, int pxn, + ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) { ARMCPU *cpu = env_archcpu(env); bool is_user = regime_is_user(env, mmu_idx); @@ -1010,8 +1227,39 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, } } - if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) { - return prot_rw; + if (in_pa != out_pa) { + switch (in_pa) { + case ARMSS_Root: + /* + * R_ZWRVD: permission fault for insn fetched from non-Root, + * I_WWBFB: SIF has no effect in EL3. + */ + return prot_rw; + case ARMSS_Realm: + /* + * R_PKTDS: permission fault for insn fetched from non-Realm, + * for Realm EL2 or EL2&0. The corresponding fault for EL1&0 + * happens during any stage2 translation. + */ + switch (mmu_idx) { + case ARMMMUIdx_E2: + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + return prot_rw; + default: + break; + } + break; + case ARMSS_Secure: + if (env->cp15.scr_el3 & SCR_SIF) { + return prot_rw; + } + break; + default: + /* Input NonSecure must have output NonSecure. */ + g_assert_not_reached(); + } } /* TODO have_wxn should be replaced with @@ -1242,22 +1490,16 @@ static int check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, uint64_t tcr, * @ptw: Current and next stage parameters for the walk. * @address: virtual address to get physical address for * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH - * @s1_is_el0: if @ptw->in_mmu_idx is ARMMMUIdx_Stage2 - * (so this is a stage 2 page table walk), - * must be true if this is stage 2 of a stage 1+2 - * walk for an EL0 access. If @mmu_idx is anything else, - * @s1_is_el0 is ignored. * @result: set on translation success, * @fi: set to fault info if the translation fails */ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint64_t address, - MMUAccessType access_type, bool s1_is_el0, + MMUAccessType access_type, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - bool is_secure = ptw->in_secure; int32_t level; ARMVAParameters param; uint64_t ttbr; @@ -1268,12 +1510,12 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, int32_t stride; int addrsize, inputsize, outputsize; uint64_t tcr = regime_tcr(env, mmu_idx); - int ap, ns, xn, pxn; + int ap, xn, pxn; uint32_t el = regime_el(env, mmu_idx); uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); uint64_t descriptor, new_descriptor; - bool nstable; + ARMSecuritySpace out_space; /* TODO: This code does not support shareability levels. */ if (aarch64) { @@ -1435,32 +1677,32 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddrmask = MAKE_64BIT_MASK(0, 40); } descaddrmask &= ~indexmask_grainsize; - - /* - * Secure stage 1 accesses start with the page table in secure memory and - * can be downgraded to non-secure at any step. Non-secure accesses - * remain non-secure. We implement this by just ORing in the NSTable/NS - * bits at each step. - * Stage 2 never gets this kind of downgrade. - */ - tableattrs = is_secure ? 0 : (1 << 4); + tableattrs = 0; next_level: descaddr |= (address >> (stride * (4 - level))) & indexmask; descaddr &= ~7ULL; - nstable = !regime_is_stage2(mmu_idx) && extract32(tableattrs, 4, 1); - if (nstable) { + + /* + * Process the NSTable bit from the previous level. This changes + * the table address space and the output space from Secure to + * NonSecure. With RME, the EL3 translation regime does not change + * from Root to NonSecure. + */ + if (ptw->in_space == ARMSS_Secure + && !regime_is_stage2(mmu_idx) + && extract32(tableattrs, 4, 1)) { /* * Stage2_S -> Stage2 or Phys_S -> Phys_NS - * Assert that the non-secure idx are even, and relative order. + * Assert the relative order of the secure/non-secure indexes. */ - QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0); - QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0); - QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != ARMMMUIdx_Phys_S); - QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != ARMMMUIdx_Stage2_S); - ptw->in_ptw_idx &= ~1; + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_S + 1 != ARMMMUIdx_Phys_NS); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2_S + 1 != ARMMMUIdx_Stage2); + ptw->in_ptw_idx += 1; ptw->in_secure = false; + ptw->in_space = ARMSS_NonSecure; } + if (!S1_ptw_translate(env, ptw, descaddr, fi)) { goto do_fault; } @@ -1563,7 +1805,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, */ attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); if (!regime_is_stage2(mmu_idx)) { - attrs |= nstable << 5; /* NS */ + attrs |= !ptw->in_secure << 5; /* NS */ if (!param.hpd) { attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ /* @@ -1576,15 +1818,79 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, } ap = extract32(attrs, 6, 2); + out_space = ptw->in_space; if (regime_is_stage2(mmu_idx)) { - ns = mmu_idx == ARMMMUIdx_Stage2; - xn = extract64(attrs, 53, 2); - result->f.prot = get_S2prot(env, ap, xn, s1_is_el0); + /* + * R_GYNXY: For stage2 in Realm security state, bit 55 is NS. + * The bit remains ignored for other security states. + * R_YMCSL: Executing an insn fetched from non-Realm causes + * a stage2 permission fault. + */ + if (out_space == ARMSS_Realm && extract64(attrs, 55, 1)) { + out_space = ARMSS_NonSecure; + result->f.prot = get_S2prot_noexecute(ap); + } else { + xn = extract64(attrs, 53, 2); + result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); + } } else { - ns = extract32(attrs, 5, 1); + int nse, ns = extract32(attrs, 5, 1); + switch (out_space) { + case ARMSS_Root: + /* + * R_GVZML: Bit 11 becomes the NSE field in the EL3 regime. + * R_XTYPW: NSE and NS together select the output pa space. + */ + nse = extract32(attrs, 11, 1); + out_space = (nse << 1) | ns; + if (out_space == ARMSS_Secure && + !cpu_isar_feature(aa64_sel2, cpu)) { + out_space = ARMSS_NonSecure; + } + break; + case ARMSS_Secure: + if (ns) { + out_space = ARMSS_NonSecure; + } + break; + case ARMSS_Realm: + switch (mmu_idx) { + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + /* I_CZPRF: For Realm EL1&0 stage1, NS bit is RES0. */ + break; + case ARMMMUIdx_E2: + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + /* + * R_LYKFZ, R_WGRZN: For Realm EL2 and EL2&1, + * NS changes the output to non-secure space. + */ + if (ns) { + out_space = ARMSS_NonSecure; + } + break; + default: + g_assert_not_reached(); + } + break; + case ARMSS_NonSecure: + /* R_QRMFF: For NonSecure state, the NS bit is RES0. */ + break; + default: + g_assert_not_reached(); + } xn = extract64(attrs, 54, 1); pxn = extract64(attrs, 53, 1); - result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); + + /* + * Note that we modified ptw->in_space earlier for NSTable, but + * result->f.attrs retains a copy of the original security space. + */ + result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, xn, pxn, + result->f.attrs.space, out_space); } if (!(result->f.prot & (1 << access_type))) { @@ -1611,14 +1917,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, } } - if (ns) { - /* - * The NS bit will (as required by the architecture) have no effect if - * the CPU doesn't support TZ or this is a non-secure translation - * regime, because the attribute will already be non-secure. - */ - result->f.attrs.secure = false; - } + result->f.attrs.space = out_space; + result->f.attrs.secure = arm_space_is_secure(out_space); if (regime_is_stage2(mmu_idx)) { result->cacheattrs.is_s2_format = true; @@ -2402,6 +2702,7 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, */ if (sattrs.ns) { result->f.attrs.secure = false; + result->f.attrs.space = ARMSS_NonSecure; } else if (!secure) { /* * NS access to S memory must fault. @@ -2668,14 +2969,16 @@ static bool get_phys_addr_disabled(CPUARMState *env, target_ulong address, ARMMMUFaultInfo *fi) { uint8_t memattr = 0x00; /* Device nGnRnE */ - uint8_t shareability = 0; /* non-sharable */ + uint8_t shareability = 0; /* non-shareable */ int r_el; switch (mmu_idx) { case ARMMMUIdx_Stage2: case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_Phys_NS: case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_Phys_Root: + case ARMMMUIdx_Phys_Realm: break; default: @@ -2725,7 +3028,7 @@ static bool get_phys_addr_disabled(CPUARMState *env, target_ulong address, } else { memattr = 0x44; /* Normal, NC, No */ } - shareability = 2; /* outer sharable */ + shareability = 2; /* outer shareable */ } result->cacheattrs.is_s2_format = false; break; @@ -2750,10 +3053,10 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, bool is_secure = ptw->in_secure; bool ret, ipa_secure; ARMCacheAttrs cacheattrs1; - bool is_el0; + ARMSecuritySpace ipa_space; uint64_t hcr; - ret = get_phys_addr_with_struct(env, ptw, address, access_type, result, fi); + ret = get_phys_addr_nogpc(env, ptw, address, access_type, result, fi); /* If S1 fails, return early. */ if (ret) { @@ -2762,10 +3065,12 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, ipa = result->f.phys_addr; ipa_secure = result->f.attrs.secure; + ipa_space = result->f.attrs.space; - is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0; + ptw->in_s1_is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0; ptw->in_mmu_idx = ipa_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; ptw->in_secure = ipa_secure; + ptw->in_space = ipa_space; ptw->in_ptw_idx = ptw_idx_for_stage_2(env, ptw->in_mmu_idx); /* @@ -2777,13 +3082,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, cacheattrs1 = result->cacheattrs; memset(result, 0, sizeof(*result)); - if (arm_feature(env, ARM_FEATURE_PMSA)) { - ret = get_phys_addr_pmsav8(env, ipa, access_type, - ptw->in_mmu_idx, is_secure, result, fi); - } else { - ret = get_phys_addr_lpae(env, ptw, ipa, access_type, - is_el0, result, fi); - } + ret = get_phys_addr_nogpc(env, ptw, ipa, access_type, result, fi); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ @@ -2843,7 +3142,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, return false; } -static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, +static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, target_ulong address, MMUAccessType access_type, GetPhysAddrResult *result, @@ -2854,15 +3153,18 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, ARMMMUIdx s1_mmu_idx; /* - * The page table entries may downgrade secure to non-secure, but - * cannot upgrade an non-secure translation regime's attributes - * to secure. + * The page table entries may downgrade Secure to NonSecure, but + * cannot upgrade a NonSecure translation regime's attributes + * to Secure or Realm. */ result->f.attrs.secure = is_secure; + result->f.attrs.space = ptw->in_space; switch (mmu_idx) { case ARMMMUIdx_Phys_S: case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_Phys_Root: + case ARMMMUIdx_Phys_Realm: /* Checking Phys early avoids special casing later vs regime_el. */ return get_phys_addr_disabled(env, address, access_type, mmu_idx, is_secure, result, fi); @@ -2908,7 +3210,7 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, default: /* Single stage uses physical for ptw. */ - ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; + ptw->in_ptw_idx = arm_space_to_phys(ptw->in_space); break; } @@ -2965,8 +3267,7 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, } if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, ptw, address, access_type, false, - result, fi); + return get_phys_addr_lpae(env, ptw, address, access_type, result, fi); } else if (arm_feature(env, ARM_FEATURE_V7) || regime_sctlr(env, mmu_idx) & SCTLR_XP) { return get_phys_addr_v6(env, ptw, address, access_type, result, fi); @@ -2975,6 +3276,23 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, } } +static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + if (get_phys_addr_nogpc(env, ptw, address, access_type, result, fi)) { + return true; + } + if (!granule_protection_check(env, result->f.phys_addr, + result->f.attrs.space, fi)) { + fi->type = ARMFault_GPCFOnOutput; + return true; + } + return false; +} + bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, bool is_secure, GetPhysAddrResult *result, @@ -2983,16 +3301,19 @@ bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, S1Translate ptw = { .in_mmu_idx = mmu_idx, .in_secure = is_secure, + .in_space = arm_secure_to_space(is_secure), }; - return get_phys_addr_with_struct(env, &ptw, address, access_type, - result, fi); + return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); } bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { - bool is_secure; + S1Translate ptw = { + .in_mmu_idx = mmu_idx, + }; + ARMSecuritySpace ss; switch (mmu_idx) { case ARMMMUIdx_E10_0: @@ -3005,30 +3326,54 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: case ARMMMUIdx_E2: - is_secure = arm_is_secure_below_el3(env); + ss = arm_security_space_below_el3(env); break; case ARMMMUIdx_Stage2: + /* + * For Secure EL2, we need this index to be NonSecure; + * otherwise this will already be NonSecure or Realm. + */ + ss = arm_security_space_below_el3(env); + if (ss == ARMSS_Secure) { + ss = ARMSS_NonSecure; + } + break; case ARMMMUIdx_Phys_NS: case ARMMMUIdx_MPrivNegPri: case ARMMMUIdx_MUserNegPri: case ARMMMUIdx_MPriv: case ARMMMUIdx_MUser: - is_secure = false; + ss = ARMSS_NonSecure; break; - case ARMMMUIdx_E3: case ARMMMUIdx_Stage2_S: case ARMMMUIdx_Phys_S: case ARMMMUIdx_MSPrivNegPri: case ARMMMUIdx_MSUserNegPri: case ARMMMUIdx_MSPriv: case ARMMMUIdx_MSUser: - is_secure = true; + ss = ARMSS_Secure; + break; + case ARMMMUIdx_E3: + if (arm_feature(env, ARM_FEATURE_AARCH64) && + cpu_isar_feature(aa64_rme, env_archcpu(env))) { + ss = ARMSS_Root; + } else { + ss = ARMSS_Secure; + } + break; + case ARMMMUIdx_Phys_Root: + ss = ARMSS_Root; + break; + case ARMMMUIdx_Phys_Realm: + ss = ARMSS_Realm; break; default: g_assert_not_reached(); } - return get_phys_addr_with_secure(env, address, access_type, mmu_idx, - is_secure, result, fi); + + ptw.in_space = ss; + ptw.in_secure = arm_space_is_secure(ss); + return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); } hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, @@ -3036,16 +3381,19 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + ARMSecuritySpace ss = arm_security_space(env); S1Translate ptw = { - .in_mmu_idx = arm_mmu_idx(env), - .in_secure = arm_is_secure(env), + .in_mmu_idx = mmu_idx, + .in_space = ss, + .in_secure = arm_space_is_secure(ss), .in_debug = true, }; GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; bool ret; - ret = get_phys_addr_with_struct(env, &ptw, addr, MMU_DATA_LOAD, &res, &fi); + ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, &res, &fi); *attrs = res.f.attrs; if (ret) { diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h index d27d1bc31f..62254d0e51 100644 --- a/target/arm/syndrome.h +++ b/target/arm/syndrome.h @@ -50,6 +50,7 @@ enum arm_exception_class { EC_SVEACCESSTRAP = 0x19, EC_ERETTRAP = 0x1a, EC_SMETRAP = 0x1d, + EC_GPC = 0x1e, EC_INSNABORT = 0x20, EC_INSNABORT_SAME_EL = 0x21, EC_PCALIGNMENT = 0x22, @@ -247,6 +248,15 @@ static inline uint32_t syn_bxjtrap(int cv, int cond, int rm) (cv << 24) | (cond << 20) | rm; } +static inline uint32_t syn_gpc(int s2ptw, int ind, int gpcsc, + int cm, int s1ptw, int wnr, int fsc) +{ + /* TODO: FEAT_NV2 adds VNCR */ + return (EC_GPC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (s2ptw << 21) + | (ind << 20) | (gpcsc << 14) | (cm << 8) | (s1ptw << 7) + | (wnr << 6) | fsc; +} + static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc) { return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 2976f94ae4..6fec2d8a57 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -142,6 +142,56 @@ static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name, cpu->sve_max_vq = max_vq; } +static bool cpu_arm_get_rme(Object *obj, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + return cpu_isar_feature(aa64_rme, cpu); +} + +static void cpu_arm_set_rme(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint64_t t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, RME, value); + cpu->isar.id_aa64pfr0 = t; +} + +static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + /* Encode the value for the GPCCR_EL3 field. */ + switch (value) { + case 30: + case 34: + case 36: + case 39: + cpu->reset_l0gptsz = value - 30; + break; + default: + error_setg(errp, "invalid value for l0gptsz"); + error_append_hint(errp, "valid values are 30, 34, 36, 39\n"); + break; + } +} + +static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t value = cpu->reset_l0gptsz + 30; + + visit_type_uint32(v, name, &value, errp); +} + static Property arm_cpu_lpa2_property = DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); @@ -700,6 +750,9 @@ void aarch64_max_tcg_initfn(Object *obj) aarch64_add_sme_properties(obj); object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq, cpu_max_set_sve_max_vq, NULL, NULL); + object_property_add_bool(obj, "x-rme", cpu_arm_get_rme, cpu_arm_set_rme); + object_property_add(obj, "x-l0gptsz", "uint32", cpu_max_get_l0gptsz, + cpu_max_set_l0gptsz, NULL, NULL); qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); } diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c index 8df36c2cbf..b22b2a4c6e 100644 --- a/target/arm/tcg/tlb_helper.c +++ b/target/arm/tcg/tlb_helper.c @@ -107,17 +107,106 @@ static uint32_t compute_fsr_fsc(CPUARMState *env, ARMMMUFaultInfo *fi, return fsr; } +static bool report_as_gpc_exception(ARMCPU *cpu, int current_el, + ARMMMUFaultInfo *fi) +{ + bool ret; + + switch (fi->gpcf) { + case GPCF_None: + return false; + case GPCF_AddressSize: + case GPCF_Walk: + case GPCF_EABT: + /* R_PYTGX: GPT faults are reported as GPC. */ + ret = true; + break; + case GPCF_Fail: + /* + * R_BLYPM: A GPF at EL3 is reported as insn or data abort. + * R_VBZMW, R_LXHQR: A GPF at EL[0-2] is reported as a GPC + * if SCR_EL3.GPF is set, otherwise an insn or data abort. + */ + ret = (cpu->env.cp15.scr_el3 & SCR_GPF) && current_el != 3; + break; + default: + g_assert_not_reached(); + } + + assert(cpu_isar_feature(aa64_rme, cpu)); + assert(fi->type == ARMFault_GPCFOnWalk || + fi->type == ARMFault_GPCFOnOutput); + if (fi->gpcf == GPCF_AddressSize) { + assert(fi->level == 0); + } else { + assert(fi->level >= 0 && fi->level <= 1); + } + + return ret; +} + +static unsigned encode_gpcsc(ARMMMUFaultInfo *fi) +{ + static uint8_t const gpcsc[] = { + [GPCF_AddressSize] = 0b000000, + [GPCF_Walk] = 0b000100, + [GPCF_Fail] = 0b001100, + [GPCF_EABT] = 0b010100, + }; + + /* Note that we've validated fi->gpcf and fi->level above. */ + return gpcsc[fi->gpcf] | fi->level; +} + static G_NORETURN void arm_deliver_fault(ARMCPU *cpu, vaddr addr, MMUAccessType access_type, int mmu_idx, ARMMMUFaultInfo *fi) { CPUARMState *env = &cpu->env; - int target_el; + int target_el = exception_target_el(env); + int current_el = arm_current_el(env); bool same_el; uint32_t syn, exc, fsr, fsc; - target_el = exception_target_el(env); + if (report_as_gpc_exception(cpu, current_el, fi)) { + target_el = 3; + + fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc); + + syn = syn_gpc(fi->stage2 && fi->type == ARMFault_GPCFOnWalk, + access_type == MMU_INST_FETCH, + encode_gpcsc(fi), 0, fi->s1ptw, + access_type == MMU_DATA_STORE, fsc); + + env->cp15.mfar_el3 = fi->paddr; + switch (fi->paddr_space) { + case ARMSS_Secure: + break; + case ARMSS_NonSecure: + env->cp15.mfar_el3 |= R_MFAR_NS_MASK; + break; + case ARMSS_Root: + env->cp15.mfar_el3 |= R_MFAR_NSE_MASK; + break; + case ARMSS_Realm: + env->cp15.mfar_el3 |= R_MFAR_NSE_MASK | R_MFAR_NS_MASK; + break; + default: + g_assert_not_reached(); + } + + exc = EXCP_GPC; + goto do_raise; + } + + /* If SCR_EL3.GPF is unset, GPF may still be routed to EL2. */ + if (fi->gpcf == GPCF_Fail && target_el < 2) { + if (arm_hcr_el2_eff(env) & HCR_GPF) { + target_el = 2; + } + } + if (fi->stage2) { target_el = 2; env->cp15.hpfar_el2 = extract64(fi->s2addr, 12, 47) << 4; @@ -125,8 +214,8 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, env->cp15.hpfar_el2 |= HPFAR_NS; } } - same_el = (arm_current_el(env) == target_el); + same_el = current_el == target_el; fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc); if (access_type == MMU_INST_FETCH) { @@ -143,6 +232,7 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, exc = EXCP_DATA_ABORT; } + do_raise: env->exception.vaddress = addr; env->exception.fsr = fsr; raise_exception(env, exc, syn, target_el); diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index ff050626e6..225d358922 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4329,7 +4329,7 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, /* Predicate register stores can be any multiple of 2. */ if (len_remain >= 8) { t0 = tcg_temp_new_i64(); - tcg_gen_st_i64(t0, base, vofs + len_align); + tcg_gen_ld_i64(t0, base, vofs + len_align); tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); len_remain -= 8; len_align += 8; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1242bd541a..c0fb6b3ad9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -637,7 +637,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER */ #ifdef TARGET_X86_64 -#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM) +#define TCG_EXT2_X86_64_FEATURES CPUID_EXT2_LM #else #define TCG_EXT2_X86_64_FEATURES 0 #endif @@ -645,9 +645,10 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \ CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \ CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB | \ - TCG_EXT2_X86_64_FEATURES) + CPUID_EXT2_SYSCALL | TCG_EXT2_X86_64_FEATURES) #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ - CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) + CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ + CPUID_EXT3_3DNOWPREFETCH) #define TCG_EXT4_FEATURES 0 #define TCG_SVM_FEATURES (CPUID_SVM_NPT | CPUID_SVM_VGIF | \ CPUID_SVM_SVME_ADDR_CHK) @@ -656,14 +657,21 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \ CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT | \ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_FSGSBASE | \ - CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2) + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_RDSEED) /* missing: CPUID_7_0_EBX_HLE - CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, - CPUID_7_0_EBX_RDSEED */ + CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM */ + +#if defined CONFIG_SOFTMMU || defined CONFIG_LINUX +#define TCG_7_0_ECX_RDPID CPUID_7_0_ECX_RDPID +#else +#define TCG_7_0_ECX_RDPID 0 +#endif #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | \ /* CPUID_7_0_ECX_OSPKE is dynamic */ \ - CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS | CPUID_7_0_ECX_VAES) + CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS | CPUID_7_0_ECX_VAES | \ + TCG_7_0_ECX_RDPID) + #define TCG_7_0_EDX_FEATURES CPUID_7_0_EDX_FSRM #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC) @@ -678,6 +686,9 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_SGX_12_0_EBX_FEATURES 0 #define TCG_SGX_12_1_EAX_FEATURES 0 +#define TCG_8000_0008_EBX (CPUID_8000_0008_EBX_XSAVEERPTR | \ + CPUID_8000_0008_EBX_WBNOINVD) + FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_1_EDX] = { .type = CPUID_FEATURE_WORD, @@ -939,7 +950,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "amd-psfd", NULL, NULL, NULL, }, .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, - .tcg_features = 0, + .tcg_features = TCG_8000_0008_EBX, .unmigratable_flags = 0, }, [FEAT_8000_0021_EAX] = { @@ -6235,6 +6246,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx |= 1 << 1; /* CmpLegacy bit */ } } + if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && + !(env->hflags & HF_LMA_MASK)) { + *edx &= ~CPUID_EXT2_SYSCALL; + } break; case 0x80000002: case 0x80000003: diff --git a/target/i386/helper.h b/target/i386/helper.h index 48609c210b..ac2b04abd6 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -51,10 +51,8 @@ DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int) DEF_HELPER_1(sysenter, void, env) DEF_HELPER_2(sysexit, void, env, int) -#ifdef TARGET_X86_64 DEF_HELPER_2(syscall, void, env, int) DEF_HELPER_2(sysret, void, env, int) -#endif DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) @@ -69,8 +67,8 @@ DEF_HELPER_2(into, void, env, int) DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_1(rechecking_single_step, void, env) DEF_HELPER_1(cpuid, void, env) +DEF_HELPER_FLAGS_1(rdpid, TCG_CALL_NO_WG, tl, env) DEF_HELPER_1(rdtsc, void, env) -DEF_HELPER_1(rdtscp, void, env) DEF_HELPER_FLAGS_1(rdpmc, TCG_CALL_NO_WG, noreturn, env) #ifndef CONFIG_USER_ONLY diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c index 5f7a3061ca..868f36ab7f 100644 --- a/target/i386/tcg/misc_helper.c +++ b/target/i386/tcg/misc_helper.c @@ -75,12 +75,6 @@ void helper_rdtsc(CPUX86State *env) env->regs[R_EDX] = (uint32_t)(val >> 32); } -void helper_rdtscp(CPUX86State *env) -{ - helper_rdtsc(env); - env->regs[R_ECX] = (uint32_t)(env->tsc_aux); -} - G_NORETURN void helper_rdpmc(CPUX86State *env) { if (((env->cr[4] & CR4_PCE_MASK) == 0 ) && @@ -137,3 +131,18 @@ void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t val) env->pkru = val; tlb_flush(cs); } + +target_ulong HELPER(rdpid)(CPUX86State *env) +{ +#if defined CONFIG_SOFTMMU + return env->tsc_aux; +#elif defined CONFIG_LINUX && defined CONFIG_GETCPU + unsigned cpu, node; + getcpu(&cpu, &node); + return (node << 12) | (cpu & 0xfff); +#elif defined CONFIG_SCHED_GETCPU + return sched_getcpu(); +#else + return 0; +#endif +} diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 03b58e94a2..e8d19c65fd 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -977,6 +977,7 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, e2); env->eip = offset; } +#endif /* TARGET_X86_64 */ void helper_sysret(CPUX86State *env, int dflag) { @@ -990,6 +991,7 @@ void helper_sysret(CPUX86State *env, int dflag) raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); } selector = (env->star >> 48) & 0xffff; +#ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | @@ -1015,7 +1017,9 @@ void helper_sysret(CPUX86State *env, int dflag) DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | (3 << DESC_DPL_SHIFT) | DESC_W_MASK | DESC_A_MASK); - } else { + } else +#endif + { env->eflags |= IF_MASK; cpu_x86_load_seg_cache(env, R_CS, selector | 3, 0, 0xffffffff, @@ -1030,7 +1034,6 @@ void helper_sysret(CPUX86State *env, int dflag) DESC_W_MASK | DESC_A_MASK); } } -#endif /* TARGET_X86_64 */ /* real mode interrupt */ static void do_interrupt_real(CPUX86State *env, int intno, int is_int, diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c index 2c9bd007ad..1cb5a0db45 100644 --- a/target/i386/tcg/sysemu/seg_helper.c +++ b/target/i386/tcg/sysemu/seg_helper.c @@ -26,7 +26,6 @@ #include "tcg/helper-tcg.h" #include "../seg_helper.h" -#ifdef TARGET_X86_64 void helper_syscall(CPUX86State *env, int next_eip_addend) { int selector; @@ -35,6 +34,7 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); } selector = (env->star >> 32) & 0xffff; +#ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { int code64; @@ -61,7 +61,9 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) } else { env->eip = env->cstar; } - } else { + } else +#endif + { env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend); env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); @@ -78,7 +80,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) env->eip = (uint32_t)env->star; } } -#endif /* TARGET_X86_64 */ void handle_even_inj(CPUX86State *env, int intno, int is_int, int error_code, int is_hw, int rm) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 08c4cab73f..28cb3fb7f4 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3924,13 +3924,33 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_cmpxchg8b(s, env, modrm); break; - case 7: /* RDSEED */ + case 7: /* RDSEED, RDPID with f3 prefix */ + if (mod != 3 || + (s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) { + goto illegal_op; + } + if (s->prefix & PREFIX_REPZ) { + if (!(s->cpuid_ext_features & CPUID_7_0_ECX_RDPID)) { + goto illegal_op; + } + gen_helper_rdpid(s->T0, cpu_env); + rm = (modrm & 7) | REX_B(s); + gen_op_mov_reg_v(s, dflag, rm, s->T0); + break; + } else { + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_RDSEED)) { + goto illegal_op; + } + goto do_rdrand; + } + case 6: /* RDRAND */ if (mod != 3 || (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) || !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) { goto illegal_op; } + do_rdrand: translator_io_start(&s->base); gen_helper_rdrand(s->T0, cpu_env); rm = (modrm & 7) | REX_B(s); @@ -5661,9 +5681,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) s->base.is_jmp = DISAS_NORETURN; break; case 0x134: /* sysenter */ - /* For Intel SYSENTER is valid on 64-bit */ - if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) + /* For AMD SYSENTER is not valid in long mode */ + if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { goto illegal_op; + } if (!PE(s)) { gen_exception_gpf(s); } else { @@ -5672,19 +5693,22 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } break; case 0x135: /* sysexit */ - /* For Intel SYSEXIT is valid on 64-bit */ - if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) + /* For AMD SYSEXIT is not valid in long mode */ + if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { goto illegal_op; - if (!PE(s)) { + } + if (!PE(s) || CPL(s) != 0) { gen_exception_gpf(s); } else { gen_helper_sysexit(cpu_env, tcg_constant_i32(dflag - 1)); s->base.is_jmp = DISAS_EOB_ONLY; } break; -#ifdef TARGET_X86_64 case 0x105: /* syscall */ - /* XXX: is it usable in real mode ? */ + /* For Intel SYSCALL is only valid in long mode */ + if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + goto illegal_op; + } gen_update_cc_op(s); gen_update_eip_cur(s); gen_helper_syscall(cpu_env, cur_insn_len_i32(s)); @@ -5694,7 +5718,11 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_eob_worker(s, false, true); break; case 0x107: /* sysret */ - if (!PE(s)) { + /* For Intel SYSRET is only valid in long mode */ + if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + goto illegal_op; + } + if (!PE(s) || CPL(s) != 0) { gen_exception_gpf(s); } else { gen_helper_sysret(cpu_env, tcg_constant_i32(dflag - 1)); @@ -5709,7 +5737,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_eob_worker(s, false, true); } break; -#endif case 0x1a2: /* cpuid */ gen_update_cc_op(s); gen_update_eip_cur(s); @@ -6108,7 +6135,9 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_update_cc_op(s); gen_update_eip_cur(s); translator_io_start(&s->base); - gen_helper_rdtscp(cpu_env); + gen_helper_rdtsc(cpu_env); + gen_helper_rdpid(s->T0, cpu_env); + gen_op_mov_reg_v(s, dflag, R_ECX, s->T0); break; default: @@ -6117,9 +6146,9 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) break; case 0x108: /* invd */ - case 0x109: /* wbinvd */ + case 0x109: /* wbinvd; wbnoinvd with REPZ prefix */ if (check_cpl0(s)) { - gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD); + gen_svm_check_intercept(s, (b & 1) ? SVM_EXIT_WBINVD : SVM_EXIT_INVD); /* nothing to do */ } break; diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c index 67481b0aa8..c45f2ac2ba 100644 --- a/target/i386/tcg/user/seg_helper.c +++ b/target/i386/tcg/user/seg_helper.c @@ -26,7 +26,6 @@ #include "tcg/helper-tcg.h" #include "tcg/seg_helper.h" -#ifdef TARGET_X86_64 void helper_syscall(CPUX86State *env, int next_eip_addend) { CPUState *cs = env_cpu(env); @@ -36,7 +35,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) env->exception_next_eip = env->eip + next_eip_addend; cpu_loop_exit(cs); } -#endif /* TARGET_X86_64 */ /* * fake user mode interrupt. is_int is TRUE if coming from the int diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 0ee2adc105..4138a25801 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -672,6 +672,8 @@ enum { POWERPC_FLAG_TM = 0x00100000, /* Has SCV (ISA 3.00) */ POWERPC_FLAG_SCV = 0x00200000, + /* Has >1 thread per core */ + POWERPC_FLAG_SMT = 0x00400000, }; /* @@ -1268,6 +1270,13 @@ struct CPUArchState { uint64_t pmu_base_time; }; +#define _CORE_ID(cs) \ + (POWERPC_CPU(cs)->env.spr_cb[SPR_PIR].default_value & ~(cs->nr_threads - 1)) + +#define THREAD_SIBLING_FOREACH(cs, cs_sibling) \ + CPU_FOREACH(cs_sibling) \ + if (_CORE_ID(cs) == _CORE_ID(cs_sibling)) + #define SET_FIT_PERIOD(a_, b_, c_, d_) \ do { \ env->fit_period[0] = (a_); \ @@ -1647,6 +1656,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_HMER (0x150) #define SPR_HMEER (0x151) #define SPR_PCR (0x152) +#define SPR_HEIR (0x153) #define SPR_BOOKE_LPIDR (0x152) #define SPR_BOOKE_TCR (0x154) #define SPR_BOOKE_TLB0PS (0x158) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 7bce421a7c..aeff71d063 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -1630,6 +1630,7 @@ static void register_8xx_sprs(CPUPPCState *env) * HSRR0 => SPR 314 (Power 2.04 hypv) * HSRR1 => SPR 315 (Power 2.04 hypv) * LPIDR => SPR 317 (970) + * HEIR => SPR 339 (Power 2.05 hypv) (64-bit reg from 3.1) * EPR => SPR 702 (Power 2.04 emb) * perf => 768-783 (Power 2.04) * perf => 784-799 (Power 2.04) @@ -5523,6 +5524,24 @@ static void register_power6_common_sprs(CPUPPCState *env) 0x00000000); } +static void register_HEIR32_spr(CPUPPCState *env) +{ + spr_register_hv(env, SPR_HEIR, "HEIR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic32, + 0x00000000); +} + +static void register_HEIR64_spr(CPUPPCState *env) +{ + spr_register_hv(env, SPR_HEIR, "HEIR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); +} + static void register_power8_tce_address_control_sprs(CPUPPCState *env) { spr_register_kvm(env, SPR_TAR, "TAR", @@ -5951,6 +5970,7 @@ static void init_proc_POWER7(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR32_spr(env); register_power6_dbg_sprs(env); register_power7_book4_sprs(env); @@ -6073,6 +6093,7 @@ static void init_proc_POWER8(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR32_spr(env); register_power6_dbg_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); @@ -6235,6 +6256,7 @@ static void init_proc_POWER9(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR32_spr(env); register_power6_dbg_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); @@ -6427,6 +6449,7 @@ static void init_proc_POWER10(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR64_spr(env); register_power6_dbg_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); @@ -6732,6 +6755,7 @@ static void ppc_cpu_realize(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); PowerPCCPU *cpu = POWERPC_CPU(dev); + CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); Error *local_err = NULL; @@ -6763,6 +6787,10 @@ static void ppc_cpu_realize(DeviceState *dev, Error **errp) pcc->parent_realize(dev, errp); + if (env_cpu(env)->nr_threads > 1) { + env->flags |= POWERPC_FLAG_SMT; + } + return; unrealize: diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 12d8a7257b..2158390e27 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -28,6 +28,7 @@ #include "trace.h" #ifdef CONFIG_TCG +#include "sysemu/tcg.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #endif @@ -133,6 +134,26 @@ static void dump_hcall(CPUPPCState *env) env->nip); } +#ifdef CONFIG_TCG +/* Return true iff byteswap is needed to load instruction */ +static inline bool insn_need_byteswap(CPUArchState *env) +{ + /* SYSTEM builds TARGET_BIG_ENDIAN. Need to swap when MSR[LE] is set */ + return !!(env->msr & ((target_ulong)1 << MSR_LE)); +} + +static uint32_t ppc_ldl_code(CPUArchState *env, abi_ptr addr) +{ + uint32_t insn = cpu_ldl_code(env, addr); + + if (insn_need_byteswap(env)) { + insn = bswap32(insn); + } + + return insn; +} +#endif + static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp) { const char *es; @@ -1328,6 +1349,72 @@ static bool books_vhyp_handles_hv_excp(PowerPCCPU *cpu) return false; } +#ifdef CONFIG_TCG +static bool is_prefix_insn(CPUPPCState *env, uint32_t insn) +{ + if (!(env->insns_flags2 & PPC2_ISA310)) { + return false; + } + return ((insn & 0xfc000000) == 0x04000000); +} + +static bool is_prefix_insn_excp(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + + if (!tcg_enabled()) { + /* + * This does not load instructions and set the prefix bit correctly + * for injected interrupts with KVM. That may have to be discovered + * and set by the KVM layer before injecting. + */ + return false; + } + + switch (excp) { + case POWERPC_EXCP_HDSI: + /* HDSI PRTABLE_FAULT has the originating access type in error_code */ + if ((env->spr[SPR_HDSISR] & DSISR_PRTABLE_FAULT) && + (env->error_code == MMU_INST_FETCH)) { + /* + * Fetch failed due to partition scope translation, so prefix + * indication is not relevant (and attempting to load the + * instruction at NIP would cause recursive faults with the same + * translation). + */ + break; + } + /* fall through */ + case POWERPC_EXCP_MCHECK: + case POWERPC_EXCP_DSI: + case POWERPC_EXCP_DSEG: + case POWERPC_EXCP_ALIGN: + case POWERPC_EXCP_PROGRAM: + case POWERPC_EXCP_FPU: + case POWERPC_EXCP_TRACE: + case POWERPC_EXCP_HV_EMU: + case POWERPC_EXCP_VPU: + case POWERPC_EXCP_VSXU: + case POWERPC_EXCP_FU: + case POWERPC_EXCP_HV_FU: { + uint32_t insn = ppc_ldl_code(env, env->nip); + if (is_prefix_insn(env, insn)) { + return true; + } + break; + } + default: + break; + } + return false; +} +#else +static bool is_prefix_insn_excp(PowerPCCPU *cpu, int excp) +{ + return false; +} +#endif + static void powerpc_excp_books(PowerPCCPU *cpu, int excp) { CPUState *cs = CPU(cpu); @@ -1375,6 +1462,10 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) vector |= env->excp_prefix; + if (is_prefix_insn_excp(cpu, excp)) { + msr |= PPC_BIT(34); + } + switch (excp) { case POWERPC_EXCP_MCHECK: /* Machine check exception */ if (!FIELD_EX64(env->msr, MSR, ME)) { @@ -1500,6 +1591,10 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) vhc->hypercall(cpu->vhyp, cpu); return; } + if (env->insns_flags2 & PPC2_ISA310) { + /* ISAv3.1 puts LEV into SRR1 */ + msr |= lev << 20; + } if (lev == 1) { new_msr |= (target_ulong)MSR_HVB; } @@ -1551,13 +1646,28 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) case POWERPC_EXCP_HDECR: /* Hypervisor decrementer exception */ case POWERPC_EXCP_HDSI: /* Hypervisor data storage exception */ case POWERPC_EXCP_SDOOR_HV: /* Hypervisor Doorbell interrupt */ - case POWERPC_EXCP_HV_EMU: case POWERPC_EXCP_HVIRT: /* Hypervisor virtualization */ srr0 = SPR_HSRR0; srr1 = SPR_HSRR1; new_msr |= (target_ulong)MSR_HVB; new_msr |= env->msr & ((target_ulong)1 << MSR_RI); break; +#ifdef CONFIG_TCG + case POWERPC_EXCP_HV_EMU: { + uint32_t insn = ppc_ldl_code(env, env->nip); + env->spr[SPR_HEIR] = insn; + if (is_prefix_insn(env, insn)) { + uint32_t insn2 = ppc_ldl_code(env, env->nip + 4); + env->spr[SPR_HEIR] <<= 32; + env->spr[SPR_HEIR] |= insn2; + } + srr0 = SPR_HSRR0; + srr1 = SPR_HSRR1; + new_msr |= (target_ulong)MSR_HVB; + new_msr |= env->msr & ((target_ulong)1 << MSR_RI); + break; + } +#endif case POWERPC_EXCP_VPU: /* Vector unavailable exception */ case POWERPC_EXCP_VSXU: /* VSX unavailable exception */ case POWERPC_EXCP_FU: /* Facility unavailable exception */ @@ -3076,22 +3186,42 @@ void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb) } /* - * sends a message to other threads that are on the same + * sends a message to another thread on the same * multi-threaded processor */ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) { - int pir = env->spr_cb[SPR_PIR].default_value; + CPUState *cs = env_cpu(env); + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; + int ttir = rb & PPC_BITMASK(57, 63); helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP); - if (!dbell_type_server(rb)) { + if (!dbell_type_server(rb) || ttir >= nr_threads) { return; } - /* TODO: TCG supports only one thread */ + if (nr_threads == 1) { + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, 1); + return; + } + + /* Does iothread need to be locked for walking CPU list? */ + qemu_mutex_lock_iothread(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + uint32_t thread_id = ppc_cpu_tir(ccpu); + + if (ttir == thread_id) { + ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, 1); + qemu_mutex_unlock_iothread(); + return; + } + } - book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL); + g_assert_not_reached(); } #endif /* TARGET_PPC64 */ @@ -3104,7 +3234,7 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, /* Restore state and reload the insn we executed, for filling in DSISR. */ cpu_restore_state(cs, retaddr); - insn = cpu_ldl_code(env, env->nip); + insn = ppc_ldl_code(env, env->nip); switch (env->mmu_model) { case POWERPC_MMU_SOFT_4xx: diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c index 63c9abe4f1..ca39efdc35 100644 --- a/target/ppc/gdbstub.c +++ b/target/ppc/gdbstub.c @@ -327,6 +327,25 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu) unsigned int num_regs = 0; int i; + for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) { + ppc_spr_t *spr = &env->spr_cb[i]; + + if (!spr->name) { + continue; + } + + /* + * GDB identifies registers based on the order they are + * presented in the XML. These ids will not match QEMU's + * representation (which follows the PowerISA). + * + * Store the position of the current register description so + * we can make the correspondence later. + */ + spr->gdb_id = num_regs; + num_regs++; + } + if (pcc->gdb_spr_xml) { return; } @@ -348,17 +367,6 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu) g_string_append_printf(xml, " bitsize=\"%d\"", TARGET_LONG_BITS); g_string_append(xml, " group=\"spr\"/>"); - - /* - * GDB identifies registers based on the order they are - * presented in the XML. These ids will not match QEMU's - * representation (which follows the PowerISA). - * - * Store the position of the current register description so - * we can make the correspondence later. - */ - spr->gdb_id = num_regs; - num_regs++; } g_string_append(xml, "</feature>"); diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 38efbc351c..fda40b8a60 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -704,6 +704,8 @@ DEF_HELPER_3(store_dcr, void, env, tl, tl) DEF_HELPER_2(load_dump_spr, void, env, i32) DEF_HELPER_2(store_dump_spr, void, env, i32) +DEF_HELPER_3(spr_write_CTRL, void, env, i32, tl) + DEF_HELPER_4(fscr_facility_check, void, env, i32, i32, i32) DEF_HELPER_4(msr_facility_check, void, env, i32, i32, i32) DEF_HELPER_FLAGS_1(load_tbl, TCG_CALL_NO_RWG, tl, env) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index a7f2de9d10..a8a935e267 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1728,6 +1728,10 @@ int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) .addr = (uintptr_t) &bits, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1741,6 +1745,10 @@ int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) .addr = (uintptr_t) &bits, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1755,6 +1763,10 @@ int kvmppc_set_tcr(PowerPCCPU *cpu) .addr = (uintptr_t) &tcr, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 40ddc5c08c..1f1af21f33 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -43,6 +43,31 @@ void helper_store_dump_spr(CPUPPCState *env, uint32_t sprn) env->spr[sprn]); } +void helper_spr_write_CTRL(CPUPPCState *env, uint32_t sprn, + target_ulong val) +{ + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t run = val & 1; + uint32_t ts, ts_mask; + + assert(sprn == SPR_CTRL); + + env->spr[sprn] &= ~1U; + env->spr[sprn] |= run; + + ts_mask = ~(1U << (8 + env->spr[SPR_TIR])); + ts = run << (8 + env->spr[SPR_TIR]); + + THREAD_SIBLING_FOREACH(cs, ccs) { + CPUPPCState *cenv = &POWERPC_CPU(ccs)->env; + + cenv->spr[sprn] &= ts_mask; + cenv->spr[sprn] |= ts; + } +} + + #ifdef TARGET_PPC64 static void raise_hv_fu_exception(CPUPPCState *env, uint32_t bit, const char *caller, uint32_t cause, @@ -159,14 +184,31 @@ void helper_store_pcr(CPUPPCState *env, target_ulong value) */ target_ulong helper_load_dpdes(CPUPPCState *env) { + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; target_ulong dpdes = 0; helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP); - /* TODO: TCG supports only one thread */ - if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { - dpdes = 1; + if (nr_threads == 1) { + if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + dpdes = 1; + } + return dpdes; + } + + qemu_mutex_lock_iothread(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + CPUPPCState *cenv = &ccpu->env; + uint32_t thread_id = ppc_cpu_tir(ccpu); + + if (cenv->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + dpdes |= (0x1 << thread_id); + } } + qemu_mutex_unlock_iothread(); return dpdes; } @@ -174,17 +216,32 @@ target_ulong helper_load_dpdes(CPUPPCState *env) void helper_store_dpdes(CPUPPCState *env, target_ulong val) { PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP); - /* TODO: TCG supports only one thread */ - if (val & ~0x1) { + if (val & ~(nr_threads - 1)) { qemu_log_mask(LOG_GUEST_ERROR, "Invalid DPDES register value " TARGET_FMT_lx"\n", val); + val &= (nr_threads - 1); /* Ignore the invalid bits */ + } + + if (nr_threads == 1) { + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1); return; } - ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1); + /* Does iothread need to be locked for walking CPU list? */ + qemu_mutex_lock_iothread(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + uint32_t thread_id = ppc_cpu_tir(ccpu); + + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & (0x1 << thread_id)); + } + qemu_mutex_unlock_iothread(); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 031efda0df..920084bd8f 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -145,6 +145,13 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; + env->error_code = 0; + if (cause & DSISR_PRTABLE_FAULT) { + /* HDSI PRTABLE_FAULT gets the originating access type in error_code */ + env->error_code = access_type; + access_type = MMU_DATA_LOAD; + } + qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx" 0x%" HWADDR_PRIx" cause %08x\n", __func__, access_str(access_type), @@ -166,7 +173,6 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, env->spr[SPR_HDSISR] = cause; env->spr[SPR_HDAR] = eaddr; env->spr[SPR_ASDR] = g_raddr; - env->error_code = 0; break; default: g_assert_not_reached(); @@ -369,17 +375,26 @@ static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate) } static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, - MMUAccessType access_type, + MMUAccessType orig_access_type, vaddr eaddr, hwaddr g_raddr, ppc_v3_pate_t pate, hwaddr *h_raddr, int *h_prot, int *h_page_size, bool pde_addr, int mmu_idx, bool guest_visible) { + MMUAccessType access_type = orig_access_type; int fault_cause = 0; hwaddr pte_addr; uint64_t pte; + if (pde_addr) { + /* + * Translation of process-scoped tables/directories is performed as + * a read-access. + */ + access_type = MMU_DATA_LOAD; + } + qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx " mmu_idx %u 0x%"HWADDR_PRIx"\n", __func__, access_str(access_type), @@ -396,7 +411,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, fault_cause |= DSISR_PRTABLE_FAULT; } if (guest_visible) { - ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, fault_cause); + ppc_radix64_raise_hsi(cpu, orig_access_type, + eaddr, g_raddr, fault_cause); } return 1; } @@ -477,10 +493,10 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, * is only used to translate the effective addresses of the * process table entries. */ - ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, - pate, &h_raddr, &h_prot, - &h_page_size, true, - /* mmu_idx is 5 because we're translating from hypervisor scope */ + /* mmu_idx is 5 because we're translating from hypervisor scope */ + ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, + prtbe_addr, pate, &h_raddr, + &h_prot, &h_page_size, true, 5, guest_visible); if (ret) { return ret; @@ -519,11 +535,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, * translation */ do { - ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, - pate, &h_raddr, &h_prot, - &h_page_size, true, /* mmu_idx is 5 because we're translating from hypervisor scope */ - 5, guest_visible); + ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, + pte_addr, pate, &h_raddr, + &h_prot, &h_page_size, + true, 5, guest_visible); if (ret) { return ret; } diff --git a/target/ppc/translate.c b/target/ppc/translate.c index b591f2e496..372ee600b2 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -234,6 +234,28 @@ struct opc_handler_t { void (*handler)(DisasContext *ctx); }; +static inline bool gen_serialize(DisasContext *ctx) +{ + if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; + return false; + } + return true; +} + +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +static inline bool gen_serialize_core(DisasContext *ctx) +{ + if (ctx->flags & POWERPC_FLAG_SMT) { + return gen_serialize(ctx); + } + + return true; +} +#endif + /* SPR load/store helpers */ static inline void gen_load_spr(TCGv t, int reg) { @@ -416,9 +438,32 @@ void spr_write_generic32(DisasContext *ctx, int sprn, int gprn) #endif } +static void spr_write_CTRL_ST(DisasContext *ctx, int sprn, int gprn) +{ + /* This does not implement >1 thread */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + tcg_gen_extract_tl(t0, cpu_gpr[gprn], 0, 1); /* Extract RUN field */ + tcg_gen_shli_tl(t1, t0, 8); /* Duplicate the bit in TS */ + tcg_gen_or_tl(t1, t1, t0); + gen_store_spr(sprn, t1); +} + void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn) { - spr_write_generic32(ctx, sprn, gprn); + if (!(ctx->flags & POWERPC_FLAG_SMT)) { + spr_write_CTRL_ST(ctx, sprn, gprn); + goto out; + } + + if (!gen_serialize(ctx)) { + return; + } + + gen_helper_spr_write_CTRL(cpu_env, tcg_constant_i32(sprn), + cpu_gpr[gprn]); +out: + spr_store_dump_spr(sprn); /* * SPR_CTRL writes must force a new translation block, @@ -770,11 +815,19 @@ void spr_write_pcr(DisasContext *ctx, int sprn, int gprn) /* DPDES */ void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn) { + if (!gen_serialize_core(ctx)) { + return; + } + gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env); } void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn) { + if (!gen_serialize_core(ctx)) { + return; + } + gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]); } #endif @@ -4422,7 +4475,12 @@ static void gen_sc(DisasContext *ctx) { uint32_t lev; - lev = (ctx->opcode >> 5) & 0x7F; + /* + * LEV is a 7-bit field, but the top 6 bits are treated as a reserved + * field (i.e., ignored). ISA v3.1 changes that to 5 bits, but that is + * for Ultravisor which TCG does not support, so just ignore the top 6. + */ + lev = (ctx->opcode >> 5) & 0x1; gen_exception_err(ctx, POWERPC_SYSCALL, lev); } diff --git a/tests/avocado/ppc_pseries.py b/tests/avocado/ppc_pseries.py index d8b04dc3ea..a8311e6555 100644 --- a/tests/avocado/ppc_pseries.py +++ b/tests/avocado/ppc_pseries.py @@ -14,12 +14,9 @@ class pseriesMachine(QemuSystemTest): timeout = 90 KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' panic_message = 'Kernel panic - not syncing' + good_message = 'VFS: Cannot open root device' - def test_ppc64_pseries(self): - """ - :avocado: tags=arch:ppc64 - :avocado: tags=machine:pseries - """ + def do_test_ppc64_linux_boot(self): kernel_url = ('https://archives.fedoraproject.org/pub/archive' '/fedora-secondary/releases/29/Everything/ppc64le/os' '/ppc/ppc64/vmlinuz') @@ -31,5 +28,69 @@ class pseriesMachine(QemuSystemTest): self.vm.add_args('-kernel', kernel_path, '-append', kernel_command_line) self.vm.launch() - console_pattern = 'Kernel command line: %s' % kernel_command_line + + def test_ppc64_vof_linux_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-machine', 'x-vof=on') + self.do_test_ppc64_linux_boot() + console_pattern = 'VFS: Cannot open root device' + wait_for_console_pattern(self, console_pattern, self.panic_message) + + def test_ppc64_linux_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.do_test_ppc64_linux_boot() + console_pattern = 'VFS: Cannot open root device' + wait_for_console_pattern(self, console_pattern, self.panic_message) + + def test_ppc64_linux_smp_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-smp', '4') + self.do_test_ppc64_linux_boot() + console_pattern = 'smp: Brought up 1 node, 4 CPUs' + wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, self.good_message, self.panic_message) + + def test_ppc64_linux_smt_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-smp', '4,threads=4') + self.do_test_ppc64_linux_boot() + console_pattern = 'CPU maps initialized for 4 threads per core' + wait_for_console_pattern(self, console_pattern, self.panic_message) + console_pattern = 'smp: Brought up 1 node, 4 CPUs' + wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, self.good_message, self.panic_message) + + def test_ppc64_linux_big_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-smp', '16,threads=4,cores=2,sockets=2') + self.vm.add_args('-m', '512M', + '-object', 'memory-backend-ram,size=256M,id=m0', + '-object', 'memory-backend-ram,size=256M,id=m1') + self.vm.add_args('-numa', 'node,nodeid=0,memdev=m0') + self.vm.add_args('-numa', 'node,nodeid=1,memdev=m1') + self.do_test_ppc64_linux_boot() + console_pattern = 'CPU maps initialized for 4 threads per core' + wait_for_console_pattern(self, console_pattern, self.panic_message) + console_pattern = 'smp: Brought up 2 nodes, 16 CPUs' wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, self.good_message, self.panic_message) diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c index edcad4a0ce..a600331843 100644 --- a/tests/qtest/cxl-test.c +++ b/tests/qtest/cxl-test.c @@ -124,6 +124,7 @@ static void cxl_t3d_deprecated(void) qtest_start(cmdline->str); qtest_end(); + rmdir(tmpfs); } static void cxl_t3d_persistent(void) @@ -138,6 +139,7 @@ static void cxl_t3d_persistent(void) qtest_start(cmdline->str); qtest_end(); + rmdir(tmpfs); } static void cxl_t3d_volatile(void) diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index e4f95b2858..dfb8003597 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -281,7 +281,7 @@ static void read_guest_mem_server(QTestState *qts, TestServer *s) /* iterate all regions */ for (i = 0; i < s->fds_num; i++) { - /* We'll check only the region statring at 0x0*/ + /* We'll check only the region starting at 0x0 */ if (s->memory.regions[i].guest_phys_addr != 0x0) { continue; } |