diff options
462 files changed, 10605 insertions, 2629 deletions
diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml index 2c7980a4f6..dc6d67aacf 100644 --- a/.gitlab-ci.d/buildtest-template.yml +++ b/.gitlab-ci.d/buildtest-template.yml @@ -26,7 +26,7 @@ make -j"$JOBS" $MAKE_CHECK_ARGS ; fi -.native_test_job_template: +.common_test_job_template: stage: test image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest script: @@ -37,8 +37,16 @@ # Avoid recompiling by hiding ninja with NINJA=":" - make NINJA=":" $MAKE_CHECK_ARGS +.native_test_job_template: + extends: .common_test_job_template + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + expire_in: 7 days + paths: + - build/meson-logs/testlog.txt + .avocado_test_job_template: - extends: .native_test_job_template + extends: .common_test_job_template cache: key: "${CI_JOB_NAME}-cache" paths: diff --git a/MAINTAINERS b/MAINTAINERS index 571556d279..dff0200f70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2545,6 +2545,13 @@ F: qapi/block*.json F: qapi/transaction.json T: git https://repo.or.cz/qemu/armbru.git block-next +Compute Express Link +M: Ben Widawsky <ben.widawsky@intel.com> +M: Jonathan Cameron <jonathan.cameron@huawei.com> +S: Supported +F: hw/cxl/ +F: include/hw/cxl/ + Dirty Bitmaps M: Eric Blake <eblake@redhat.com> M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> @@ -3367,6 +3374,7 @@ F: qemu-nbd.* F: blockdev-nbd.c F: docs/interop/nbd.txt F: docs/tools/qemu-nbd.rst +F: tests/qemu-iotests/tests/*nbd* T: git https://repo.or.cz/qemu/ericb.git nbd T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd @@ -165,10 +165,7 @@ ifneq ($(filter $(ninja-targets), $(ninja-cmd-goals)),) endif endif -# Force configure to re-run if the API symbols are updated ifeq ($(CONFIG_PLUGIN),y) -config-host.mak: $(SRC_PATH)/plugins/qemu-plugins.symbols - .PHONY: plugins plugins: $(call quiet-command,\ diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h index d884aa2aaa..1b6fd9c607 100644 --- a/accel/tcg/tcg-accel-ops-icount.h +++ b/accel/tcg/tcg-accel-ops-icount.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef TCG_CPUS_ICOUNT_H -#define TCG_CPUS_ICOUNT_H +#ifndef TCG_ACCEL_OPS_ICOUNT_H +#define TCG_ACCEL_OPS_ICOUNT_H void icount_handle_deadline(void); void icount_prepare_for_run(CPUState *cpu); @@ -16,4 +16,4 @@ void icount_process_data(CPUState *cpu); void icount_handle_interrupt(CPUState *cpu, int mask); -#endif /* TCG_CPUS_ICOUNT_H */ +#endif /* TCG_ACCEL_OPS_ICOUNT_H */ diff --git a/accel/tcg/tcg-accel-ops-mttcg.h b/accel/tcg/tcg-accel-ops-mttcg.h index 9fdc5a2ab5..8ffa7a9a9f 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.h +++ b/accel/tcg/tcg-accel-ops-mttcg.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef TCG_CPUS_MTTCG_H -#define TCG_CPUS_MTTCG_H +#ifndef TCG_ACCEL_OPS_MTTCG_H +#define TCG_ACCEL_OPS_MTTCG_H /* kick MTTCG vCPU thread */ void mttcg_kick_vcpu_thread(CPUState *cpu); @@ -16,4 +16,4 @@ void mttcg_kick_vcpu_thread(CPUState *cpu); /* start an mttcg vCPU thread */ void mttcg_start_vcpu_thread(CPUState *cpu); -#endif /* TCG_CPUS_MTTCG_H */ +#endif /* TCG_ACCEL_OPS_MTTCG_H */ diff --git a/accel/tcg/tcg-accel-ops-rr.h b/accel/tcg/tcg-accel-ops-rr.h index 54f6ae6e86..2a76a29612 100644 --- a/accel/tcg/tcg-accel-ops-rr.h +++ b/accel/tcg/tcg-accel-ops-rr.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef TCG_CPUS_RR_H -#define TCG_CPUS_RR_H +#ifndef TCG_ACCEL_OPS_RR_H +#define TCG_ACCEL_OPS_RR_H #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10) @@ -18,4 +18,4 @@ void rr_kick_vcpu_thread(CPUState *unused); /* start the round robin vcpu thread */ void rr_start_vcpu_thread(CPUState *cpu); -#endif /* TCG_CPUS_RR_H */ +#endif /* TCG_ACCEL_OPS_RR_H */ diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h index 6a5fcef889..f9bc6330e2 100644 --- a/accel/tcg/tcg-accel-ops.h +++ b/accel/tcg/tcg-accel-ops.h @@ -9,8 +9,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef TCG_CPUS_H -#define TCG_CPUS_H +#ifndef TCG_ACCEL_OPS_H +#define TCG_ACCEL_OPS_H #include "sysemu/cpus.h" @@ -19,4 +19,4 @@ int tcg_cpus_exec(CPUState *cpu); void tcg_handle_interrupt(CPUState *cpu, int mask); void tcg_cpu_init_cflags(CPUState *cpu, bool parallel); -#endif /* TCG_CPUS_H */ +#endif /* TCG_ACCEL_OPS_H */ diff --git a/audio/audio.c b/audio/audio.c index 9e91a5a4f2..a02f3ce5c6 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -2099,13 +2099,19 @@ static void audio_validate_opts(Audiodev *dev, Error **errp) void audio_parse_option(const char *opt) { - AudiodevListEntry *e; Audiodev *dev = NULL; Visitor *v = qobject_input_visitor_new_str(opt, "driver", &error_fatal); visit_type_Audiodev(v, NULL, &dev, &error_fatal); visit_free(v); + audio_define(dev); +} + +void audio_define(Audiodev *dev) +{ + AudiodevListEntry *e; + audio_validate_opts(dev, &error_fatal); e = g_new0(AudiodevListEntry, 1); diff --git a/audio/audio.h b/audio/audio.h index 3d5ecdecd5..b5e17cd218 100644 --- a/audio/audio.h +++ b/audio/audio.h @@ -168,6 +168,7 @@ void audio_sample_to_uint64(const void *samples, int pos, void audio_sample_from_uint64(void *samples, int pos, uint64_t left, uint64_t right); +void audio_define(Audiodev *audio); void audio_parse_option(const char *opt); void audio_init_audiodevs(void); void audio_legacy_help(void); diff --git a/backends/rng.c b/backends/rng.c index 3757b04485..6c7bf64426 100644 --- a/backends/rng.c +++ b/backends/rng.c @@ -48,24 +48,10 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp) static void rng_backend_complete(UserCreatable *uc, Error **errp) { - object_property_set_bool(OBJECT(uc), "opened", true, errp); -} - -static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp) -{ - RngBackend *s = RNG_BACKEND(obj); + RngBackend *s = RNG_BACKEND(uc); RngBackendClass *k = RNG_BACKEND_GET_CLASS(s); Error *local_err = NULL; - if (value == s->opened) { - return; - } - - if (!value && s->opened) { - error_setg(errp, QERR_PERMISSION_DENIED); - return; - } - if (k->opened) { k->opened(s, &local_err); if (local_err) { @@ -122,7 +108,7 @@ static void rng_backend_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "opened", rng_backend_prop_get_opened, - rng_backend_prop_set_opened); + NULL); } static const TypeInfo rng_backend_info = { diff --git a/block/copy-on-read.h b/block/copy-on-read.h index 7bf405dccd..1d8ad38c74 100644 --- a/block/copy-on-read.h +++ b/block/copy-on-read.h @@ -22,11 +22,11 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef BLOCK_COPY_ON_READ -#define BLOCK_COPY_ON_READ +#ifndef BLOCK_COPY_ON_READ_H +#define BLOCK_COPY_ON_READ_H #include "block/block_int.h" void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs); -#endif /* BLOCK_COPY_ON_READ */ +#endif /* BLOCK_COPY_ON_READ_H */ diff --git a/block/coroutines.h b/block/coroutines.h index 8ea70d45f9..830ecaa733 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef BLOCK_COROUTINES_INT_H -#define BLOCK_COROUTINES_INT_H +#ifndef BLOCK_COROUTINES_H +#define BLOCK_COROUTINES_H #include "block/block_int.h" @@ -129,4 +129,4 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int generated_co_wrapper blk_do_flush(BlockBackend *blk); -#endif /* BLOCK_COROUTINES_INT_H */ +#endif /* BLOCK_COROUTINES_H */ diff --git a/block/io.c b/block/io.c index 9769ec53b0..789e6373d5 100644 --- a/block/io.c +++ b/block/io.c @@ -751,7 +751,7 @@ void bdrv_drain_all(void) * * This function should be called when a tracked request is completing. */ -static void tracked_request_end(BdrvTrackedRequest *req) +static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req) { if (req->serialising) { qatomic_dec(&req->bs->serialising_in_flight); diff --git a/blockdev-nbd.c b/blockdev-nbd.c index 7f6531cba0..012256bb02 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -30,18 +30,23 @@ typedef struct NBDServerData { } NBDServerData; static NBDServerData *nbd_server; -static bool is_qemu_nbd; +static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */ static void nbd_update_server_watch(NBDServerData *s); -void nbd_server_is_qemu_nbd(bool value) +void nbd_server_is_qemu_nbd(int max_connections) { - is_qemu_nbd = value; + qemu_nbd_connections = max_connections; } bool nbd_server_is_running(void) { - return nbd_server || is_qemu_nbd; + return nbd_server || qemu_nbd_connections >= 0; +} + +int nbd_server_max_connections(void) +{ + return nbd_server ? nbd_server->max_connections : qemu_nbd_connections; } static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) diff --git a/bsd-user/arm/target.h b/bsd-user/arm/target.h index 419c039b68..7c423ec575 100644 --- a/bsd-user/arm/target.h +++ b/bsd-user/arm/target.h @@ -17,5 +17,5 @@ static inline bool regpairs_aligned(void *cpu_env) return true; } -#endif /* ! TARGET_H */ +#endif /* TARGET_H */ diff --git a/bsd-user/arm/target_arch.h b/bsd-user/arm/target_arch.h index 93cfaea098..561934bbd2 100644 --- a/bsd-user/arm/target_arch.h +++ b/bsd-user/arm/target_arch.h @@ -17,12 +17,12 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_H_ -#define _TARGET_ARCH_H_ +#ifndef TARGET_ARCH_H +#define TARGET_ARCH_H #include "qemu.h" void target_cpu_set_tls(CPUARMState *env, target_ulong newtls); target_ulong target_cpu_get_tls(CPUARMState *env); -#endif /* !_TARGET_ARCH_H_ */ +#endif /* TARGET_ARCH_H */ diff --git a/bsd-user/arm/target_arch_cpu.h b/bsd-user/arm/target_arch_cpu.h index afb7814a8d..517d008764 100644 --- a/bsd-user/arm/target_arch_cpu.h +++ b/bsd-user/arm/target_arch_cpu.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_CPU_H_ -#define _TARGET_ARCH_CPU_H_ +#ifndef TARGET_ARCH_CPU_H +#define TARGET_ARCH_CPU_H #include "target_arch.h" #include "signal-common.h" @@ -210,4 +210,4 @@ static inline void target_cpu_reset(CPUArchState *env) { } -#endif /* !_TARGET_ARCH_CPU_H */ +#endif /* TARGET_ARCH_CPU_H */ diff --git a/bsd-user/arm/target_arch_elf.h b/bsd-user/arm/target_arch_elf.h index 4a0215d02e..935bce347f 100644 --- a/bsd-user/arm/target_arch_elf.h +++ b/bsd-user/arm/target_arch_elf.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_ELF_H_ -#define _TARGET_ARCH_ELF_H_ + +#ifndef TARGET_ARCH_ELF_H +#define TARGET_ARCH_ELF_H #define ELF_START_MMAP 0x80000000 #define ELF_ET_DYN_LOAD_ADDR 0x500000 @@ -125,4 +126,4 @@ static uint32_t get_elf_hwcap2(void) #undef GET_FEATURE #undef GET_FEATURE_ID -#endif /* _TARGET_ARCH_ELF_H_ */ +#endif /* TARGET_ARCH_ELF_H */ diff --git a/bsd-user/arm/target_arch_reg.h b/bsd-user/arm/target_arch_reg.h index ef5ed5154f..070fa24da1 100644 --- a/bsd-user/arm/target_arch_reg.h +++ b/bsd-user/arm/target_arch_reg.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_REG_H_ -#define _TARGET_ARCH_REG_H_ +#ifndef TARGET_ARCH_REG_H +#define TARGET_ARCH_REG_H /* See sys/arm/include/reg.h */ typedef struct target_reg { @@ -57,4 +57,4 @@ static inline void target_copy_regs(target_reg_t *regs, const CPUARMState *env) #undef tswapreg -#endif /* !_TARGET_ARCH_REG_H_ */ +#endif /* TARGET_ARCH_REG_H */ diff --git a/bsd-user/arm/target_arch_signal.h b/bsd-user/arm/target_arch_signal.h index f1844dbf22..02b2b33e07 100644 --- a/bsd-user/arm/target_arch_signal.h +++ b/bsd-user/arm/target_arch_signal.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SIGNAL_H_ -#define _TARGET_ARCH_SIGNAL_H_ + +#ifndef TARGET_ARCH_SIGNAL_H +#define TARGET_ARCH_SIGNAL_H #include "cpu.h" @@ -85,4 +86,4 @@ struct target_sigframe { target_mcontext_vfp_t sf_vfp; /* actual saved VFP context */ }; -#endif /* !_TARGET_ARCH_SIGNAL_H_ */ +#endif /* TARGET_ARCH_SIGNAL_H */ diff --git a/bsd-user/arm/target_arch_sigtramp.h b/bsd-user/arm/target_arch_sigtramp.h index 5d434a9e7e..06198045ed 100644 --- a/bsd-user/arm/target_arch_sigtramp.h +++ b/bsd-user/arm/target_arch_sigtramp.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SIGTRAMP_H_ -#define _TARGET_ARCH_SIGTRAMP_H_ +#ifndef TARGET_ARCH_SIGTRAMP_H +#define TARGET_ARCH_SIGTRAMP_H /* Compare to arm/arm/locore.S ENTRY_NP(sigcode) */ static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, @@ -46,4 +46,4 @@ static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, return memcpy_to_target(offset, sigtramp_code, TARGET_SZSIGCODE); } -#endif /* _TARGET_ARCH_SIGTRAMP_H_ */ +#endif /* TARGET_ARCH_SIGTRAMP_H */ diff --git a/bsd-user/arm/target_arch_sysarch.h b/bsd-user/arm/target_arch_sysarch.h index 8cc6bff207..5cb7864197 100644 --- a/bsd-user/arm/target_arch_sysarch.h +++ b/bsd-user/arm/target_arch_sysarch.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SYSARCH_H_ -#define _TARGET_ARCH_SYSARCH_H_ +#ifndef TARGET_ARCH_SYSARCH_H +#define TARGET_ARCH_SYSARCH_H #include "target_syscall.h" #include "target_arch.h" @@ -75,4 +75,4 @@ static inline void do_freebsd_arch_print_sysarch( } } -#endif /*!_TARGET_ARCH_SYSARCH_H_ */ +#endif /* TARGET_ARCH_SYSARCH_H */ diff --git a/bsd-user/arm/target_arch_thread.h b/bsd-user/arm/target_arch_thread.h index fcafca2408..fd257f313d 100644 --- a/bsd-user/arm/target_arch_thread.h +++ b/bsd-user/arm/target_arch_thread.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_THREAD_H_ -#define _TARGET_ARCH_THREAD_H_ + +#ifndef TARGET_ARCH_THREAD_H +#define TARGET_ARCH_THREAD_H /* Compare to arm/arm/vm_machdep.c cpu_set_upcall_kse() */ static inline void target_thread_set_upcall(CPUARMState *env, abi_ulong entry, @@ -77,4 +78,4 @@ static inline void target_thread_init(struct target_pt_regs *regs, */ } -#endif /* !_TARGET_ARCH_THREAD_H_ */ +#endif /* TARGET_ARCH_THREAD_H */ diff --git a/bsd-user/arm/target_arch_vmparam.h b/bsd-user/arm/target_arch_vmparam.h index 4bbc04ddf5..3fb69aff51 100644 --- a/bsd-user/arm/target_arch_vmparam.h +++ b/bsd-user/arm/target_arch_vmparam.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_VMPARAM_H_ -#define _TARGET_ARCH_VMPARAM_H_ + +#ifndef TARGET_ARCH_VMPARAM_H +#define TARGET_ARCH_VMPARAM_H #include "cpu.h" @@ -45,4 +46,4 @@ static inline void set_second_rval(CPUARMState *state, abi_ulong retval2) state->regs[1] = retval2; } -#endif /* ! _TARGET_ARCH_VMPARAM_H_ */ +#endif /* TARGET_ARCH_VMPARAM_H */ diff --git a/bsd-user/arm/target_syscall.h b/bsd-user/arm/target_syscall.h index a5f2bb4e01..5804a53541 100644 --- a/bsd-user/arm/target_syscall.h +++ b/bsd-user/arm/target_syscall.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SYSCALL_H_ -#define _TARGET_ARCH_SYSCALL_H_ +#ifndef ARM_TARGET_SYSCALL_H +#define ARM_TARGET_SYSCALL_H struct target_pt_regs { abi_long uregs[17]; @@ -52,4 +52,4 @@ struct target_pt_regs { #define TARGET_HW_MACHINE "arm" #define TARGET_HW_MACHINE_ARCH "armv7" -#endif /* !_TARGET_ARCH_SYSCALL_H_ */ +#endif /* ARM_TARGET_SYSCALL_H */ diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index f0c3f347ec..a6bff3b8c2 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef BSD_FILE_H_ -#define BSD_FILE_H_ +#ifndef BSD_FILE_H +#define BSD_FILE_H #include "qemu/path.h" @@ -27,4 +27,4 @@ extern struct iovec *lock_iovec(int type, abi_ulong target_addr, int count, extern void unlock_iovec(struct iovec *vec, abi_ulong target_addr, int count, int copy); -#endif /* !BSD_FILE_H_ */ +#endif /* BSD_FILE_H */ diff --git a/bsd-user/errno_defs.h b/bsd-user/errno_defs.h index 73cfa24b7f..f3e8ac3488 100644 --- a/bsd-user/errno_defs.h +++ b/bsd-user/errno_defs.h @@ -34,8 +34,8 @@ * @(#)errno.h 8.5 (Berkeley) 1/21/94 */ -#ifndef _ERRNO_DEFS_H_ -#define _ERRNO_DEFS_H_ +#ifndef ERRNO_DEFS_H +#define ERRNO_DEFS_H #define TARGET_EPERM 1 /* Operation not permitted */ #define TARGET_ENOENT 2 /* No such file or directory */ @@ -157,4 +157,4 @@ _Static_assert(TARGET_ERESTART == QEMU_ERESTARTSYS, "TARGET_ERESTART and QEMU_ERESTARTSYS expected to match"); -#endif /* ! _ERRNO_DEFS_H_ */ +#endif /* ERRNO_DEFS_H */ diff --git a/bsd-user/freebsd/host-os.h b/bsd-user/freebsd/host-os.h index dfb8344b7b..40cae72ec9 100644 --- a/bsd-user/freebsd/host-os.h +++ b/bsd-user/freebsd/host-os.h @@ -17,9 +17,9 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _HOST_OS_H_ -#define _HOST_OS_H_ +#ifndef HOST_OS_H +#define HOST_OS_H #define HOST_DEFAULT_BSD_TYPE target_freebsd -#endif /*!_HOST_OS_H_ */ +#endif /* HOST_OS_H */ diff --git a/bsd-user/freebsd/target_os_elf.h b/bsd-user/freebsd/target_os_elf.h index e5ac8e8e50..9df17d56d8 100644 --- a/bsd-user/freebsd/target_os_elf.h +++ b/bsd-user/freebsd/target_os_elf.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_ELF_H_ -#define _TARGET_OS_ELF_H_ + +#ifndef TARGET_OS_ELF_H +#define TARGET_OS_ELF_H #include "target_arch_elf.h" #include "elf.h" @@ -134,4 +135,4 @@ static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc, return sp; } -#endif /* _TARGET_OS_ELF_H_ */ +#endif /* TARGET_OS_ELF_H */ diff --git a/bsd-user/freebsd/target_os_siginfo.h b/bsd-user/freebsd/target_os_siginfo.h index d50a3034a8..4573738752 100644 --- a/bsd-user/freebsd/target_os_siginfo.h +++ b/bsd-user/freebsd/target_os_siginfo.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_SIGINFO_H_ -#define _TARGET_OS_SIGINFO_H_ + +#ifndef TARGET_OS_SIGINFO_H +#define TARGET_OS_SIGINFO_H #define TARGET_NSIG 128 #define TARGET_NSIG_BPW (sizeof(uint32_t) * 8) @@ -155,4 +156,4 @@ struct target_sigevent { #define TARGET_FPE_FLTINV (7) /* Invalid floating point operation. */ #define TARGET_FPE_FLTSUB (8) /* Subscript out of range. */ -#endif /* !_TARGET_OS_SIGINFO_H_ */ +#endif /* TARGET_OS_SIGINFO_H */ diff --git a/bsd-user/freebsd/target_os_signal.h b/bsd-user/freebsd/target_os_signal.h index 43700d08f7..5030abb52b 100644 --- a/bsd-user/freebsd/target_os_signal.h +++ b/bsd-user/freebsd/target_os_signal.h @@ -1,5 +1,5 @@ -#ifndef _TARGET_OS_SIGNAL_H_ -#define _TARGET_OS_SIGNAL_H_ +#ifndef TARGET_OS_SIGNAL_H +#define TARGET_OS_SIGNAL_H #include "target_os_siginfo.h" #include "target_arch_signal.h" @@ -78,4 +78,4 @@ abi_long setup_sigframe_arch(CPUArchState *env, abi_ulong frame_addr, #define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ #define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack*/ -#endif /* !_TARGET_OS_SIGNAL_H_ */ +#endif /* TARGET_OS_SIGNAL_H */ diff --git a/bsd-user/freebsd/target_os_stack.h b/bsd-user/freebsd/target_os_stack.h index 1bb1a2bf56..0590133291 100644 --- a/bsd-user/freebsd/target_os_stack.h +++ b/bsd-user/freebsd/target_os_stack.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_STACK_H_ -#define _TARGET_OS_STACK_H_ +#ifndef TARGET_OS_STACK_H +#define TARGET_OS_STACK_H #include <sys/param.h> #include "target_arch_sigtramp.h" @@ -178,4 +178,4 @@ static inline int setup_initial_stack(struct bsd_binprm *bprm, return 0; } -#endif /* !_TARGET_OS_STACK_H_ */ +#endif /* TARGET_OS_STACK_H */ diff --git a/bsd-user/freebsd/target_os_thread.h b/bsd-user/freebsd/target_os_thread.h index 77433acdff..1b32cebd26 100644 --- a/bsd-user/freebsd/target_os_thread.h +++ b/bsd-user/freebsd/target_os_thread.h @@ -17,9 +17,9 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_THREAD_H_ -#define _TARGET_OS_THREAD_H_ +#ifndef TARGET_OS_THREAD_H +#define TARGET_OS_THREAD_H #include "target_arch_thread.h" -#endif /* !_TARGET_OS_THREAD_H_ */ +#endif /* TARGET_OS_THREAD_H */ diff --git a/bsd-user/freebsd/target_os_user.h b/bsd-user/freebsd/target_os_user.h index 19892c5071..f036a32343 100644 --- a/bsd-user/freebsd/target_os_user.h +++ b/bsd-user/freebsd/target_os_user.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_USER_H_ -#define _TARGET_OS_USER_H_ +#ifndef TARGET_OS_USER_H +#define TARGET_OS_USER_H /* * from sys/priority.h @@ -326,4 +326,4 @@ struct target_kinfo_vmentry { char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ }; -#endif /* ! _TARGET_OS_USER_H_ */ +#endif /* TARGET_OS_USER_H */ diff --git a/bsd-user/freebsd/target_os_vmparam.h b/bsd-user/freebsd/target_os_vmparam.h index 990300c619..8457dd3913 100644 --- a/bsd-user/freebsd/target_os_vmparam.h +++ b/bsd-user/freebsd/target_os_vmparam.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_VMPARAM_H_ -#define _TARGET_OS_VMPARAM_H_ + +#ifndef TARGET_OS_VMPARAM_H +#define TARGET_OS_VMPARAM_H #include "target_arch_vmparam.h" @@ -35,4 +36,4 @@ extern abi_ulong target_stksiz; #define TARGET_PS_STRINGS ((target_stkbas + target_stksiz) - \ sizeof(struct target_ps_strings)) -#endif /* !TARGET_OS_VMPARAM_H_ */ +#endif /* TARGET_OS_VMPARAM_H */ diff --git a/bsd-user/i386/target.h b/bsd-user/i386/target.h index 9b9df047a3..ddd3b8ec08 100644 --- a/bsd-user/i386/target.h +++ b/bsd-user/i386/target.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef TARGET_ARCH_H -#define TARGET_ARCH_H +#ifndef TARGET_H +#define TARGET_H /* * i386 doesn't 'lump' the registers for 64-bit args. @@ -17,5 +17,4 @@ static inline bool regpairs_aligned(void *cpu_env) return false; } -#endif /* ! TARGET_ARCH_H */ - +#endif /* TARGET_H */ diff --git a/bsd-user/i386/target_arch.h b/bsd-user/i386/target_arch.h index 73e9a028fe..9595e60f09 100644 --- a/bsd-user/i386/target_arch.h +++ b/bsd-user/i386/target_arch.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_H_ -#define _TARGET_ARCH_H_ +#ifndef TARGET_ARCH_H +#define TARGET_ARCH_H /* target_arch_cpu.c */ void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit, @@ -28,4 +28,4 @@ void bsd_i386_set_idt_base(uint64_t base); #define target_cpu_set_tls(env, newtls) -#endif /* ! _TARGET_ARCH_H_ */ +#endif /* TARGET_ARCH_H */ diff --git a/bsd-user/i386/target_arch_cpu.h b/bsd-user/i386/target_arch_cpu.h index 9da22202d4..d792dc720f 100644 --- a/bsd-user/i386/target_arch_cpu.h +++ b/bsd-user/i386/target_arch_cpu.h @@ -16,8 +16,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_CPU_H_ -#define _TARGET_ARCH_CPU_H_ +#ifndef TARGET_ARCH_CPU_H +#define TARGET_ARCH_CPU_H #include "target_arch.h" #include "signal-common.h" @@ -195,4 +195,4 @@ static inline void target_cpu_reset(CPUArchState *env) cpu_reset(env_cpu(env)); } -#endif /* ! _TARGET_ARCH_CPU_H_ */ +#endif /* TARGET_ARCH_CPU_H */ diff --git a/bsd-user/i386/target_arch_elf.h b/bsd-user/i386/target_arch_elf.h index eb760e07fa..cbcd1f08e2 100644 --- a/bsd-user/i386/target_arch_elf.h +++ b/bsd-user/i386/target_arch_elf.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_ELF_H_ -#define _TARGET_ARCH_ELF_H_ + +#ifndef TARGET_ARCH_ELF_H +#define TARGET_ARCH_ELF_H #define ELF_START_MMAP 0x80000000 #define ELF_ET_DYN_LOAD_ADDR 0x01001000 @@ -32,4 +33,4 @@ #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 -#endif /* _TARGET_ARCH_ELF_H_ */ +#endif /* TARGET_ARCH_ELF_H */ diff --git a/bsd-user/i386/target_arch_reg.h b/bsd-user/i386/target_arch_reg.h index 1fce1daf01..8123109697 100644 --- a/bsd-user/i386/target_arch_reg.h +++ b/bsd-user/i386/target_arch_reg.h @@ -18,8 +18,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_REG_H_ -#define _TARGET_ARCH_REG_H_ +#ifndef TARGET_ARCH_REG_H +#define TARGET_ARCH_REG_H /* See sys/i386/include/reg.h */ typedef struct target_reg { @@ -79,4 +79,4 @@ static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env) regs->r_gs = env->segs[R_GS].selector & 0xffff; } -#endif /* !_TARGET_ARCH_REG_H_ */ +#endif /* TARGET_ARCH_REG_H */ diff --git a/bsd-user/i386/target_arch_sigtramp.h b/bsd-user/i386/target_arch_sigtramp.h index cb4e89b0b0..ef94cc864f 100644 --- a/bsd-user/i386/target_arch_sigtramp.h +++ b/bsd-user/i386/target_arch_sigtramp.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SIGTRAMP_H_ -#define _TARGET_ARCH_SIGTRAMP_H_ +#ifndef TARGET_ARCH_SIGTRAMP_H +#define TARGET_ARCH_SIGTRAMP_H static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, unsigned sys_sigreturn) @@ -26,4 +26,4 @@ static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, return 0; } -#endif /* _TARGET_ARCH_SIGTRAMP_H_ */ +#endif /* TARGET_ARCH_SIGTRAMP_H */ diff --git a/bsd-user/i386/target_arch_sysarch.h b/bsd-user/i386/target_arch_sysarch.h index e9ab98ec32..db8fee6380 100644 --- a/bsd-user/i386/target_arch_sysarch.h +++ b/bsd-user/i386/target_arch_sysarch.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef BSD_USER_ARCH_SYSARCH_H_ -#define BSD_USER_ARCH_SYSARCH_H_ +#ifndef TARGET_ARCH_SYSARCH_H +#define TARGET_ARCH_SYSARCH_H #include "target_syscall.h" @@ -74,4 +74,4 @@ static inline void do_freebsd_arch_print_sysarch( TARGET_ABI_FMT_lx ")", name->name, (int)arg1, arg2, arg3, arg4); } -#endif /* !BSD_USER_ARCH_SYSARCH_H_ */ +#endif /* TARGET_ARCH_SYSARCH_H */ diff --git a/bsd-user/i386/target_arch_thread.h b/bsd-user/i386/target_arch_thread.h index e65e476f75..cee2148d94 100644 --- a/bsd-user/i386/target_arch_thread.h +++ b/bsd-user/i386/target_arch_thread.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_THREAD_H_ -#define _TARGET_ARCH_THREAD_H_ + +#ifndef TARGET_ARCH_THREAD_H +#define TARGET_ARCH_THREAD_H /* Compare to vm_machdep.c cpu_set_upcall_kse() */ static inline void target_thread_set_upcall(CPUX86State *regs, abi_ulong entry, @@ -44,4 +45,4 @@ static inline void target_thread_init(struct target_pt_regs *regs, regs->edx = 0; } -#endif /* !_TARGET_ARCH_THREAD_H_ */ +#endif /* TARGET_ARCH_THREAD_H */ diff --git a/bsd-user/i386/target_arch_vmparam.h b/bsd-user/i386/target_arch_vmparam.h index bb7718265b..79db420e59 100644 --- a/bsd-user/i386/target_arch_vmparam.h +++ b/bsd-user/i386/target_arch_vmparam.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_VMPARAM_H_ -#define _TARGET_ARCH_VMPARAM_H_ + +#ifndef TARGET_ARCH_VMPARAM_H +#define TARGET_ARCH_VMPARAM_H #include "cpu.h" @@ -43,4 +44,4 @@ static inline void set_second_rval(CPUX86State *state, abi_ulong retval2) state->regs[R_EDX] = retval2; } -#endif /* !_TARGET_ARCH_VMPARAM_H_ */ +#endif /* TARGET_ARCH_VMPARAM_H */ diff --git a/bsd-user/netbsd/host-os.h b/bsd-user/netbsd/host-os.h index c0be51a7ef..7c14b1ea78 100644 --- a/bsd-user/netbsd/host-os.h +++ b/bsd-user/netbsd/host-os.h @@ -17,9 +17,9 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _HOST_OS_H_ -#define _HOST_OS_H_ +#ifndef HOST_OS_H +#define HOST_OS_H #define HOST_DEFAULT_BSD_TYPE target_netbsd -#endif /*!_HOST_OS_H_ */ +#endif /* HOST_OS_H */ diff --git a/bsd-user/netbsd/target_os_elf.h b/bsd-user/netbsd/target_os_elf.h index 21b475f458..2f3cb20871 100644 --- a/bsd-user/netbsd/target_os_elf.h +++ b/bsd-user/netbsd/target_os_elf.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_ELF_H_ -#define _TARGET_OS_ELF_H_ + +#ifndef TARGET_OS_ELF_H +#define TARGET_OS_ELF_H #include "target_arch_elf.h" #include "elf.h" @@ -143,4 +144,4 @@ static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc, return sp; } -#endif /* _TARGET_OS_ELF_H_ */ +#endif /* TARGET_OS_ELF_H */ diff --git a/bsd-user/netbsd/target_os_siginfo.h b/bsd-user/netbsd/target_os_siginfo.h index 667c19cc7c..eb57e0a309 100644 --- a/bsd-user/netbsd/target_os_siginfo.h +++ b/bsd-user/netbsd/target_os_siginfo.h @@ -1,5 +1,5 @@ -#ifndef _TARGET_OS_SIGINFO_H_ -#define _TARGET_OS_SIGINFO_H_ +#ifndef TARGET_OS_SIGINFO_H +#define TARGET_OS_SIGINFO_H #define TARGET_NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ #define TARGET_NSIG_BPW (sizeof(uint32_t) * 8) @@ -79,4 +79,4 @@ typedef union target_siginfo { #define TARGET_TRAP_TRACE 2 -#endif /* ! _TARGET_OS_SIGINFO_H_ */ +#endif /* TARGET_OS_SIGINFO_H */ diff --git a/bsd-user/netbsd/target_os_signal.h b/bsd-user/netbsd/target_os_signal.h index a373922f7e..4ee4f768e0 100644 --- a/bsd-user/netbsd/target_os_signal.h +++ b/bsd-user/netbsd/target_os_signal.h @@ -1,5 +1,5 @@ -#ifndef _TARGET_OS_SIGNAL_H_ -#define _TARGET_OS_SIGNAL_H_ +#ifndef TARGET_OS_SIGNAL_H +#define TARGET_OS_SIGNAL_H #include "target_os_siginfo.h" #include "target_arch_signal.h" @@ -66,4 +66,4 @@ #define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ #define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ -#endif /* !_TARGET_OS_SIGNAL_H_ */ +#endif /* TARGET_OS_SIGNAL_H */ diff --git a/bsd-user/netbsd/target_os_stack.h b/bsd-user/netbsd/target_os_stack.h index 503279c1a9..8349e9149b 100644 --- a/bsd-user/netbsd/target_os_stack.h +++ b/bsd-user/netbsd/target_os_stack.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_STACK_H_ -#define _TARGET_OS_STACK_H_ +#ifndef TARGET_OS_STACK_H +#define TARGET_OS_STACK_H #include "target_arch_sigtramp.h" @@ -53,4 +53,4 @@ static inline int setup_initial_stack(struct bsd_binprm *bprm, abi_ulong *p, return 0; } -#endif /* !_TARGET_OS_STACK_H_ */ +#endif /* TARGET_OS_STACK_H */ diff --git a/bsd-user/netbsd/target_os_thread.h b/bsd-user/netbsd/target_os_thread.h index 904dd1bf78..8ccfa16e4b 100644 --- a/bsd-user/netbsd/target_os_thread.h +++ b/bsd-user/netbsd/target_os_thread.h @@ -17,9 +17,9 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_THREAD_H_ -#define _TARGET_OS_THREAD_H_ +#ifndef TARGET_OS_THREAD_H +#define TARGET_OS_THREAD_H #include "target_arch_thread.h" -#endif /* !_TARGET_OS_THREAD_H_ */ +#endif /* TARGET_OS_THREAD_H */ diff --git a/bsd-user/openbsd/host-os.h b/bsd-user/openbsd/host-os.h index eb8fdf1567..b9222335d4 100644 --- a/bsd-user/openbsd/host-os.h +++ b/bsd-user/openbsd/host-os.h @@ -17,9 +17,9 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _HOST_OS_H_ -#define _HOST_OS_H_ +#ifndef HOST_OS_H +#define HOST_OS_H #define HOST_DEFAULT_BSD_TYPE target_openbsd -#endif /*!_HOST_OS_H_ */ +#endif /* HOST_OS_H */ diff --git a/bsd-user/openbsd/target_os_elf.h b/bsd-user/openbsd/target_os_elf.h index a5cfcd3aff..6dca9c5a85 100644 --- a/bsd-user/openbsd/target_os_elf.h +++ b/bsd-user/openbsd/target_os_elf.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_ELF_H_ -#define _TARGET_OS_ELF_H_ + +#ifndef TARGET_OS_ELF_H +#define TARGET_OS_ELF_H #include "target_arch_elf.h" #include "elf.h" @@ -143,4 +144,4 @@ static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc, return sp; } -#endif /* _TARGET_OS_ELF_H_ */ +#endif /* TARGET_OS_ELF_H */ diff --git a/bsd-user/openbsd/target_os_siginfo.h b/bsd-user/openbsd/target_os_siginfo.h index baf646a5ab..732009a820 100644 --- a/bsd-user/openbsd/target_os_siginfo.h +++ b/bsd-user/openbsd/target_os_siginfo.h @@ -1,5 +1,5 @@ -#ifndef _TARGET_OS_SIGINFO_H_ -#define _TARGET_OS_SIGINFO_H_ +#ifndef TARGET_OS_SIGINFO_H +#define TARGET_OS_SIGINFO_H #define TARGET_NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ #define TARGET_NSIG_BPW (sizeof(uint32_t) * 8) @@ -79,4 +79,4 @@ typedef union target_siginfo { #define TARGET_TRAP_TRACE 2 -#endif /* ! _TARGET_OS_SIGINFO_H_ */ +#endif /* TARGET_OS_SIGINFO_H */ diff --git a/bsd-user/openbsd/target_os_signal.h b/bsd-user/openbsd/target_os_signal.h index a373922f7e..4ee4f768e0 100644 --- a/bsd-user/openbsd/target_os_signal.h +++ b/bsd-user/openbsd/target_os_signal.h @@ -1,5 +1,5 @@ -#ifndef _TARGET_OS_SIGNAL_H_ -#define _TARGET_OS_SIGNAL_H_ +#ifndef TARGET_OS_SIGNAL_H +#define TARGET_OS_SIGNAL_H #include "target_os_siginfo.h" #include "target_arch_signal.h" @@ -66,4 +66,4 @@ #define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ #define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ -#endif /* !_TARGET_OS_SIGNAL_H_ */ +#endif /* TARGET_OS_SIGNAL_H */ diff --git a/bsd-user/openbsd/target_os_stack.h b/bsd-user/openbsd/target_os_stack.h index 4b37955d3b..264a658608 100644 --- a/bsd-user/openbsd/target_os_stack.h +++ b/bsd-user/openbsd/target_os_stack.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_STACK_H_ -#define _TARGET_OS_STACK_H_ +#ifndef TARGET_OS_STACK_H +#define TARGET_OS_STACK_H #include "target_arch_sigtramp.h" @@ -53,4 +53,4 @@ static inline int setup_initial_stack(struct bsd_binprm *bprm, abi_ulong *p, return 0; } -#endif /* !_TARGET_OS_STACK_H_ */ +#endif /* TARGET_OS_STACK_H */ diff --git a/bsd-user/openbsd/target_os_thread.h b/bsd-user/openbsd/target_os_thread.h index 01ed0d9fc8..c3adc6712f 100644 --- a/bsd-user/openbsd/target_os_thread.h +++ b/bsd-user/openbsd/target_os_thread.h @@ -17,9 +17,9 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_OS_THREAD_H_ -#define _TARGET_OS_THREAD_H_ +#ifndef TARGET_OS_THREAD_H +#define TARGET_OS_THREAD_H #include "target_arch_thread.h" -#endif /* !_TARGET_OS_THREAD_H_ */ +#endif /* TARGET_OS_THREAD_H */ diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h index c3bf14f38f..f5797b28e3 100644 --- a/bsd-user/syscall_defs.h +++ b/bsd-user/syscall_defs.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _SYSCALL_DEFS_H_ -#define _SYSCALL_DEFS_H_ +#ifndef SYSCALL_DEFS_H +#define SYSCALL_DEFS_H #include <sys/syscall.h> #include <sys/resource.h> @@ -226,4 +226,4 @@ type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ return safe_syscall(SYS_##name, arg1, arg2, arg3, arg4, arg5, arg6); \ } -#endif /* ! _SYSCALL_DEFS_H_ */ +#endif /* SYSCALL_DEFS_H */ diff --git a/bsd-user/x86_64/target.h b/bsd-user/x86_64/target.h index 8956631db1..0cf0e2a14a 100644 --- a/bsd-user/x86_64/target.h +++ b/bsd-user/x86_64/target.h @@ -17,5 +17,5 @@ static inline bool regpairs_aligned(void *cpu_env) return false; } -#endif /* ! TARGET_H */ +#endif /* TARGET_H */ diff --git a/bsd-user/x86_64/target_arch.h b/bsd-user/x86_64/target_arch.h index e558e1b956..09bd974889 100644 --- a/bsd-user/x86_64/target_arch.h +++ b/bsd-user/x86_64/target_arch.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_H_ -#define _TARGET_ARCH_H_ +#ifndef TARGET_ARCH_H +#define TARGET_ARCH_H /* target_arch_cpu.c */ void bsd_x86_64_write_dt(void *ptr, unsigned long addr, unsigned long limit, @@ -28,4 +28,4 @@ void bsd_x86_64_set_idt_base(uint64_t base); #define target_cpu_set_tls(env, newtls) -#endif /* !_TARGET_ARCH_H_ */ +#endif /* TARGET_ARCH_H */ diff --git a/bsd-user/x86_64/target_arch_cpu.h b/bsd-user/x86_64/target_arch_cpu.h index 5be2f02416..4094d61da1 100644 --- a/bsd-user/x86_64/target_arch_cpu.h +++ b/bsd-user/x86_64/target_arch_cpu.h @@ -16,8 +16,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_CPU_H_ -#define _TARGET_ARCH_CPU_H_ +#ifndef TARGET_ARCH_CPU_H +#define TARGET_ARCH_CPU_H #include "target_arch.h" #include "signal-common.h" @@ -174,4 +174,4 @@ static inline void target_cpu_reset(CPUArchState *env) cpu_reset(env_cpu(env)); } -#endif /* ! _TARGET_ARCH_CPU_H_ */ +#endif /* TARGET_ARCH_CPU_H */ diff --git a/bsd-user/x86_64/target_arch_elf.h b/bsd-user/x86_64/target_arch_elf.h index c2f8553962..b244711888 100644 --- a/bsd-user/x86_64/target_arch_elf.h +++ b/bsd-user/x86_64/target_arch_elf.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_ELF_H_ -#define _TARGET_ARCH_ELF_H_ + +#ifndef TARGET_ARCH_ELF_H +#define TARGET_ARCH_ELF_H #define ELF_START_MMAP 0x2aaaaab000ULL #define ELF_ET_DYN_LOAD_ADDR 0x01021000 @@ -32,4 +33,4 @@ #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 -#endif /* _TARGET_ARCH_ELF_H_ */ +#endif /* TARGET_ARCH_ELF_H */ diff --git a/bsd-user/x86_64/target_arch_reg.h b/bsd-user/x86_64/target_arch_reg.h index 00e9624517..7a766de918 100644 --- a/bsd-user/x86_64/target_arch_reg.h +++ b/bsd-user/x86_64/target_arch_reg.h @@ -18,8 +18,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_REG_H_ -#define _TARGET_ARCH_REG_H_ +#ifndef TARGET_ARCH_REG_H +#define TARGET_ARCH_REG_H /* See sys/amd64/include/reg.h */ typedef struct target_reg { @@ -89,4 +89,4 @@ static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env) regs->r_ss = env->segs[R_SS].selector & 0xffff; } -#endif /* !_TARGET_ARCH_REG_H_ */ +#endif /* TARGET_ARCH_REG_H */ diff --git a/bsd-user/x86_64/target_arch_signal.h b/bsd-user/x86_64/target_arch_signal.h index b4a0ebf2bd..ca24bf1e7f 100644 --- a/bsd-user/x86_64/target_arch_signal.h +++ b/bsd-user/x86_64/target_arch_signal.h @@ -15,8 +15,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SIGNAL_H_ -#define _TARGET_ARCH_SIGNAL_H_ + +#ifndef TARGET_ARCH_SIGNAL_H +#define TARGET_ARCH_SIGNAL_H #include "cpu.h" @@ -96,4 +97,4 @@ struct target_sigframe { uint32_t __spare__[2]; }; -#endif /* !TARGET_ARCH_SIGNAL_H_ */ +#endif /* TARGET_ARCH_SIGNAL_H */ diff --git a/bsd-user/x86_64/target_arch_sigtramp.h b/bsd-user/x86_64/target_arch_sigtramp.h index 29d4a8b55f..01da614098 100644 --- a/bsd-user/x86_64/target_arch_sigtramp.h +++ b/bsd-user/x86_64/target_arch_sigtramp.h @@ -17,8 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_SIGTRAMP_H_ -#define _TARGET_ARCH_SIGTRAMP_H_ +#ifndef TARGET_ARCH_SIGTRAMP_H +#define TARGET_ARCH_SIGTRAMP_H static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, unsigned sys_sigreturn) @@ -26,4 +26,4 @@ static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, return 0; } -#endif /* _TARGET_ARCH_SIGTRAMP_H_ */ +#endif /* TARGET_ARCH_SIGTRAMP_H */ diff --git a/bsd-user/x86_64/target_arch_sysarch.h b/bsd-user/x86_64/target_arch_sysarch.h index 5c36fc0752..152cb8bcb8 100644 --- a/bsd-user/x86_64/target_arch_sysarch.h +++ b/bsd-user/x86_64/target_arch_sysarch.h @@ -16,8 +16,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef BSD_USER_ARCH_SYSARCH_H_ -#define BSD_USER_ARCH_SYSARCH_H_ +#ifndef TARGET_ARCH_SYSARCH_H +#define TARGET_ARCH_SYSARCH_H #include "target_syscall.h" @@ -73,4 +73,4 @@ static inline void do_freebsd_arch_print_sysarch( TARGET_ABI_FMT_lx ")", name->name, (int)arg1, arg2, arg3, arg4); } -#endif /*! BSD_USER_ARCH_SYSARCH_H_ */ +#endif /* TARGET_ARCH_SYSARCH_H */ diff --git a/bsd-user/x86_64/target_arch_thread.h b/bsd-user/x86_64/target_arch_thread.h index b745d7ffeb..52c28906d6 100644 --- a/bsd-user/x86_64/target_arch_thread.h +++ b/bsd-user/x86_64/target_arch_thread.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_THREAD_H_ -#define _TARGET_ARCH_THREAD_H_ + +#ifndef TARGET_ARCH_THREAD_H +#define TARGET_ARCH_THREAD_H /* Compare to vm_machdep.c cpu_set_upcall_kse() */ static inline void target_thread_set_upcall(CPUX86State *regs, abi_ulong entry, @@ -35,4 +36,4 @@ static inline void target_thread_init(struct target_pt_regs *regs, regs->rdi = infop->start_stack; } -#endif /* !_TARGET_ARCH_THREAD_H_ */ +#endif /* TARGET_ARCH_THREAD_H */ diff --git a/bsd-user/x86_64/target_arch_vmparam.h b/bsd-user/x86_64/target_arch_vmparam.h index 81a915f2e5..6797623a6b 100644 --- a/bsd-user/x86_64/target_arch_vmparam.h +++ b/bsd-user/x86_64/target_arch_vmparam.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _TARGET_ARCH_VMPARAM_H_ -#define _TARGET_ARCH_VMPARAM_H_ + +#ifndef TARGET_ARCH_VMPARAM_H +#define TARGET_ARCH_VMPARAM_H #include "cpu.h" @@ -43,4 +44,4 @@ static inline void set_second_rval(CPUX86State *state, abi_ulong retval2) state->regs[R_EDX] = retval2; } -#endif /* !_TARGET_ARCH_VMPARAM_H_ */ +#endif /* TARGET_ARCH_VMPARAM_H */ diff --git a/chardev/char-io.c b/chardev/char-io.c index 8ced184160..4451128cba 100644 --- a/chardev/char-io.c +++ b/chardev/char-io.c @@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, ret = qio_channel_writev_full( ioc, &iov, 1, - fds, nfds, NULL); + fds, nfds, 0, NULL); if (ret == QIO_CHANNEL_ERR_BLOCK) { if (offset) { return offset; diff --git a/chardev/chardev-internal.h b/chardev/chardev-internal.h index aba0240759..4e03af3147 100644 --- a/chardev/chardev-internal.h +++ b/chardev/chardev-internal.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + #ifndef CHARDEV_INTERNAL_H #define CHARDEV_INTERNAL_H @@ -64,4 +65,4 @@ void mux_chr_send_all_event(Chardev *chr, QEMUChrEvent event); Object *get_chardevs_root(void); -#endif /* CHAR_MUX_H */ +#endif /* CHARDEV_INTERNAL_H */ @@ -1043,10 +1043,6 @@ Advanced options (experts only): --enable-tsan enable thread sanitizer --disable-werror disable compilation abort on warning --disable-stack-protector disable compiler-provided stack protection - --audio-drv-list=LIST set audio drivers to try if -audiodev is not used - --block-drv-whitelist=L Same as --block-drv-rw-whitelist=L - --with-trace-file=NAME Full PATH,NAME of file to store traces - Default:trace-<pid> --cpu=CPU Build for host CPU [$cpu] --with-coroutine=BACKEND coroutine backend. Supported options: ucontext, sigaltstack, windows @@ -1992,7 +1988,6 @@ fi if test "$static" = "yes" ; then echo "CONFIG_STATIC=y" >> $config_host_mak fi -qemu_version=$(head $source_path/VERSION) echo "SRC_PATH=$source_path" >> $config_host_mak echo "TARGET_DIRS=$target_list" >> $config_host_mak if test "$modules" = "yes"; then @@ -2038,7 +2033,6 @@ echo "CCAS=$ccas" >> $config_host_mak echo "CPP=$cpp" >> $config_host_mak echo "OBJCOPY=$objcopy" >> $config_host_mak echo "LD=$ld" >> $config_host_mak -echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak echo "QEMU_CXXFLAGS=$QEMU_CXXFLAGS" >> $config_host_mak echo "QEMU_OBJCFLAGS=$QEMU_OBJCFLAGS" >> $config_host_mak diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c index b2c0f98253..9ef61cf5a7 100644 --- a/contrib/vhost-user-scsi/vhost-user-scsi.c +++ b/contrib/vhost-user-scsi/vhost-user-scsi.c @@ -433,13 +433,16 @@ out: if (vdev_scsi) { g_main_loop_unref(vdev_scsi->loop); g_free(vdev_scsi); - unlink(opt_socket_path); } if (csock >= 0) { close(csock); } if (lsock >= 0) { close(lsock); + + if (opt_socket_path) { + unlink(opt_socket_path); + } } g_free(opt_socket_path); g_free(iscsi_uri); diff --git a/crypto/ivgen-plain.h b/crypto/ivgen-plain.h index 43db898809..ebae6acd04 100644 --- a/crypto/ivgen-plain.h +++ b/crypto/ivgen-plain.h @@ -17,11 +17,11 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef QCRYPTO_IVGEN_PLAIN_H__ -#define QCRYPTO_IVGEN_PLAIN_H__ +#ifndef QCRYPTO_IVGEN_PLAIN_H +#define QCRYPTO_IVGEN_PLAIN_H #include "ivgenpriv.h" extern struct QCryptoIVGenDriver qcrypto_ivgen_plain; -#endif /* QCRYPTO_IVGEN_PLAIN_H__ */ +#endif /* QCRYPTO_IVGEN_PLAIN_H */ diff --git a/crypto/secret_common.c b/crypto/secret_common.c index 714a15d5e5..3441c44ca8 100644 --- a/crypto/secret_common.c +++ b/crypto/secret_common.c @@ -138,36 +138,44 @@ static void qcrypto_secret_decode(const uint8_t *input, static void -qcrypto_secret_prop_set_loaded(Object *obj, - bool value, - Error **errp) +qcrypto_secret_complete(UserCreatable *uc, Error **errp) { - QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(uc); QCryptoSecretCommonClass *sec_class - = QCRYPTO_SECRET_COMMON_GET_CLASS(obj); - - if (value) { - Error *local_err = NULL; - uint8_t *input = NULL; - size_t inputlen = 0; - uint8_t *output = NULL; - size_t outputlen = 0; - - if (sec_class->load_data) { - sec_class->load_data(secret, &input, &inputlen, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } else { - error_setg(errp, "%s provides no 'load_data' method'", - object_get_typename(obj)); + = QCRYPTO_SECRET_COMMON_GET_CLASS(uc); + + Error *local_err = NULL; + uint8_t *input = NULL; + size_t inputlen = 0; + uint8_t *output = NULL; + size_t outputlen = 0; + + if (sec_class->load_data) { + sec_class->load_data(secret, &input, &inputlen, &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } + } else { + error_setg(errp, "%s provides no 'load_data' method'", + object_get_typename(OBJECT(uc))); + return; + } - if (secret->keyid) { - qcrypto_secret_decrypt(secret, input, inputlen, - &output, &outputlen, &local_err); + if (secret->keyid) { + qcrypto_secret_decrypt(secret, input, inputlen, + &output, &outputlen, &local_err); + g_free(input); + if (local_err) { + error_propagate(errp, local_err); + return; + } + input = output; + inputlen = outputlen; + } else { + if (secret->format == QCRYPTO_SECRET_FORMAT_BASE64) { + qcrypto_secret_decode(input, inputlen, + &output, &outputlen, &local_err); g_free(input); if (local_err) { error_propagate(errp, local_err); @@ -175,26 +183,11 @@ qcrypto_secret_prop_set_loaded(Object *obj, } input = output; inputlen = outputlen; - } else { - if (secret->format == QCRYPTO_SECRET_FORMAT_BASE64) { - qcrypto_secret_decode(input, inputlen, - &output, &outputlen, &local_err); - g_free(input); - if (local_err) { - error_propagate(errp, local_err); - return; - } - input = output; - inputlen = outputlen; - } } - - secret->rawdata = input; - secret->rawlen = inputlen; - } else if (secret->rawdata) { - error_setg(errp, "Cannot unload secret"); - return; } + + secret->rawdata = input; + secret->rawlen = inputlen; } @@ -269,13 +262,6 @@ qcrypto_secret_prop_get_keyid(Object *obj, static void -qcrypto_secret_complete(UserCreatable *uc, Error **errp) -{ - object_property_set_bool(OBJECT(uc), "loaded", true, errp); -} - - -static void qcrypto_secret_finalize(Object *obj) { QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); @@ -294,7 +280,7 @@ qcrypto_secret_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "loaded", qcrypto_secret_prop_get_loaded, - qcrypto_secret_prop_set_loaded); + NULL); object_class_property_add_enum(oc, "format", "QCryptoSecretFormat", &QCryptoSecretFormat_lookup, diff --git a/crypto/tlscredsanon.c b/crypto/tlscredsanon.c index 6fb83639ec..c0d23a0ef3 100644 --- a/crypto/tlscredsanon.c +++ b/crypto/tlscredsanon.c @@ -119,16 +119,11 @@ qcrypto_tls_creds_anon_unload(QCryptoTLSCredsAnon *creds G_GNUC_UNUSED) static void -qcrypto_tls_creds_anon_prop_set_loaded(Object *obj, - bool value, - Error **errp) +qcrypto_tls_creds_anon_complete(UserCreatable *uc, Error **errp) { - QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj); + QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(uc); - qcrypto_tls_creds_anon_unload(creds); - if (value) { - qcrypto_tls_creds_anon_load(creds, errp); - } + qcrypto_tls_creds_anon_load(creds, errp); } @@ -164,13 +159,6 @@ qcrypto_tls_creds_anon_prop_get_loaded(Object *obj G_GNUC_UNUSED, static void -qcrypto_tls_creds_anon_complete(UserCreatable *uc, Error **errp) -{ - object_property_set_bool(OBJECT(uc), "loaded", true, errp); -} - - -static void qcrypto_tls_creds_anon_finalize(Object *obj) { QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj); @@ -188,7 +176,7 @@ qcrypto_tls_creds_anon_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "loaded", qcrypto_tls_creds_anon_prop_get_loaded, - qcrypto_tls_creds_anon_prop_set_loaded); + NULL); } diff --git a/crypto/tlscredspsk.c b/crypto/tlscredspsk.c index 752f2d92be..a4f9891274 100644 --- a/crypto/tlscredspsk.c +++ b/crypto/tlscredspsk.c @@ -188,16 +188,11 @@ qcrypto_tls_creds_psk_unload(QCryptoTLSCredsPSK *creds G_GNUC_UNUSED) static void -qcrypto_tls_creds_psk_prop_set_loaded(Object *obj, - bool value, - Error **errp) +qcrypto_tls_creds_psk_complete(UserCreatable *uc, Error **errp) { - QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(uc); - qcrypto_tls_creds_psk_unload(creds); - if (value) { - qcrypto_tls_creds_psk_load(creds, errp); - } + qcrypto_tls_creds_psk_load(creds, errp); } @@ -233,13 +228,6 @@ qcrypto_tls_creds_psk_prop_get_loaded(Object *obj G_GNUC_UNUSED, static void -qcrypto_tls_creds_psk_complete(UserCreatable *uc, Error **errp) -{ - object_property_set_bool(OBJECT(uc), "loaded", true, errp); -} - - -static void qcrypto_tls_creds_psk_finalize(Object *obj) { QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); @@ -276,7 +264,7 @@ qcrypto_tls_creds_psk_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "loaded", qcrypto_tls_creds_psk_prop_get_loaded, - qcrypto_tls_creds_psk_prop_set_loaded); + NULL); object_class_property_add_str(oc, "username", qcrypto_tls_creds_psk_prop_get_username, qcrypto_tls_creds_psk_prop_set_username); diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c index 32948a6bdc..d14313925d 100644 --- a/crypto/tlscredsx509.c +++ b/crypto/tlscredsx509.c @@ -687,16 +687,11 @@ qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds G_GNUC_UNUSED) static void -qcrypto_tls_creds_x509_prop_set_loaded(Object *obj, - bool value, - Error **errp) +qcrypto_tls_creds_x509_complete(UserCreatable *uc, Error **errp) { - QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(uc); - qcrypto_tls_creds_x509_unload(creds); - if (value) { - qcrypto_tls_creds_x509_load(creds, errp); - } + qcrypto_tls_creds_x509_load(creds, errp); } @@ -815,13 +810,6 @@ qcrypto_tls_creds_x509_reload(QCryptoTLSCreds *creds, Error **errp) static void -qcrypto_tls_creds_x509_complete(UserCreatable *uc, Error **errp) -{ - object_property_set_bool(OBJECT(uc), "loaded", true, errp); -} - - -static void qcrypto_tls_creds_x509_init(Object *obj) { QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); @@ -852,7 +840,7 @@ qcrypto_tls_creds_x509_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "loaded", qcrypto_tls_creds_x509_prop_get_loaded, - qcrypto_tls_creds_x509_prop_set_loaded); + NULL); object_class_property_add_bool(oc, "sanity-check", qcrypto_tls_creds_x509_prop_get_sanity, qcrypto_tls_creds_x509_prop_set_sanity); diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 896e5a97ab..a92ae0f162 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -39,15 +39,6 @@ should specify an ``audiodev=`` property. Additionally, when using vnc, you should specify an ``audiodev=`` property if you plan to transmit audio through the VNC protocol. -Creating sound card devices using ``-soundhw`` (since 5.1) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Sound card devices should be created using ``-device`` instead. The -names are the same for most devices. The exceptions are ``hda`` which -needs two devices (``-device intel-hda -device hda-duplex``) and -``pcspk`` which can be activated using ``-machine -pcspk-audiodev=<name>``. - ``-chardev`` backend aliases ``tty`` and ``parport`` (since 6.0) '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' @@ -90,25 +81,6 @@ the process listing. This is replaced by the new ``password-secret`` option which lets the password be securely provided on the command line using a ``secret`` object instance. -``opened`` property of ``rng-*`` objects (since 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''' - -The only effect of specifying ``opened=on`` in the command line or QMP -``object-add`` is that the device is opened immediately, possibly before all -other options have been processed. This will either have no effect (if -``opened`` was the last option) or cause errors. The property is therefore -useless and should not be specified. - -``loaded`` property of ``secret`` and ``secret_keyring`` objects (since 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The only effect of specifying ``loaded=on`` in the command line or QMP -``object-add`` is that the secret is loaded immediately, possibly before all -other options have been processed. This will either have no effect (if -``loaded`` was the last option) or cause options to be effectively ignored as -if they were not given. The property is therefore useless and should not be -specified. - ``-display sdl,window_close=...`` (since 6.1) ''''''''''''''''''''''''''''''''''''''''''''' @@ -270,12 +242,6 @@ from Linux upstream kernel, declare it deprecated. System emulator CPUS -------------------- -``Icelake-Client`` CPU Model (since 5.2) -'''''''''''''''''''''''''''''''''''''''' - -``Icelake-Client`` CPU Models are deprecated. Use ``Icelake-Server`` CPU -Models instead. - MIPS ``I7200`` CPU Model (since 5.2) '''''''''''''''''''''''''''''''''''' diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index 4a0b270296..eb76974347 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -355,6 +355,21 @@ The ``-writeconfig`` option was not able to serialize the entire contents of the QEMU command line. It is thus considered a failed experiment and removed without a replacement. +``loaded`` property of ``secret`` and ``secret_keyring`` objects (removed in 7.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``loaded=on`` option in the command line or QMP ``object-add`` either had +no effect (if ``loaded`` was the last option) or caused options to be +effectively ignored as if they were not given. The property is therefore +useless and should simply be removed. + +``opened`` property of ``rng-*`` objects (removed in 7.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``opened=on`` option in the command line or QMP ``object-add`` either had +no effect (if ``opened`` was the last option) or caused errors. The property +is therefore useless and should simply be removed. + QEMU Machine Protocol (QMP) commands ------------------------------------ @@ -557,6 +572,12 @@ Support for this CPU was removed from the upstream Linux kernel, and there is no available upstream toolchain to build binaries for it. Removed without replacement. +x86 ``Icelake-Client`` CPU (removed in 7.1) +''''''''''''''''''''''''''''''''''''''''''' + +There isn't ever Icelake Client CPU, it is some wrong and imaginary one. +Use ``Icelake-Server`` instead. + System emulator machines ------------------------ @@ -633,6 +654,13 @@ tripped up the CI testing and was suspected to be quite broken. For that reason the maintainers strongly suspected no one actually used it. +Creating sound card devices using ``-soundhw`` (removed in 7.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Sound card devices should be created using ``-device`` or ``-audio``. +The exception is ``pcspk`` which can be activated using ``-machine +pcspk-audiodev=<name>``. + TCG introspection features -------------------------- diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index a50889c556..e1a93df263 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -18,3 +18,4 @@ Details about QEMU's various subsystems including how to add features to them. tracing vfio-migration writing-monitor-commands + virtio-backends diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst index 7b968433a6..cd9b544376 100644 --- a/docs/devel/qapi-code-gen.rst +++ b/docs/devel/qapi-code-gen.rst @@ -739,10 +739,11 @@ Type names ending with ``Kind`` or ``List`` are reserved for the generator, which uses them for implicit union enums and array types, respectively. -Command names, and member names within a type, should be all lower -case with words separated by a hyphen. However, some existing older -commands and complex types use underscore; when extending them, -consistency is preferred over blindly avoiding underscore. +Command names, member names within a type, and feature names should be +all lower case with words separated by a hyphen. However, some +existing older commands and complex types use underscore; when +extending them, consistency is preferred over blindly avoiding +underscore. Event names should be ALL_CAPS with words separated by underscore. diff --git a/docs/devel/virtio-backends.rst b/docs/devel/virtio-backends.rst new file mode 100644 index 0000000000..9ff092e7a0 --- /dev/null +++ b/docs/devel/virtio-backends.rst @@ -0,0 +1,214 @@ +.. + Copyright (c) 2022, Linaro Limited + Written by Alex Bennée + +Writing VirtIO backends for QEMU +================================ + +This document attempts to outline the information a developer needs to +know to write device emulations in QEMU. It is specifically focused on +implementing VirtIO devices. For VirtIO the frontend is the driver +running on the guest. The backend is the everything that QEMU needs to +do to handle the emulation of the VirtIO device. This can be done +entirely in QEMU, divided between QEMU and the kernel (vhost) or +handled by a separate process which is configured by QEMU +(vhost-user). + +VirtIO Transports +----------------- + +VirtIO supports a number of different transports. While the details of +the configuration and operation of the device will generally be the +same QEMU represents them as different devices depending on the +transport they use. For example -device virtio-foo represents the foo +device using mmio and -device virtio-foo-pci is the same class of +device using the PCI transport. + +Using the QEMU Object Model (QOM) +--------------------------------- + +Generally all devices in QEMU are super classes of ``TYPE_DEVICE`` +however VirtIO devices should be based on ``TYPE_VIRTIO_DEVICE`` which +itself is derived from the base class. For example: + +.. code:: c + + static const TypeInfo virtio_blk_info = { + .name = TYPE_VIRTIO_BLK, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOBlock), + .instance_init = virtio_blk_instance_init, + .class_init = virtio_blk_class_init, + }; + +The author may decide to have a more expansive class hierarchy to +support multiple device types. For example the Virtio GPU device: + +.. code:: c + + static const TypeInfo virtio_gpu_base_info = { + .name = TYPE_VIRTIO_GPU_BASE, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOGPUBase), + .class_size = sizeof(VirtIOGPUBaseClass), + .class_init = virtio_gpu_base_class_init, + .abstract = true + }; + + static const TypeInfo vhost_user_gpu_info = { + .name = TYPE_VHOST_USER_GPU, + .parent = TYPE_VIRTIO_GPU_BASE, + .instance_size = sizeof(VhostUserGPU), + .instance_init = vhost_user_gpu_instance_init, + .instance_finalize = vhost_user_gpu_instance_finalize, + .class_init = vhost_user_gpu_class_init, + }; + + static const TypeInfo virtio_gpu_info = { + .name = TYPE_VIRTIO_GPU, + .parent = TYPE_VIRTIO_GPU_BASE, + .instance_size = sizeof(VirtIOGPU), + .class_size = sizeof(VirtIOGPUClass), + .class_init = virtio_gpu_class_init, + }; + +defines a base class for the VirtIO GPU and then specialises two +versions, one for the internal implementation and the other for the +vhost-user version. + +VirtIOPCIProxy +^^^^^^^^^^^^^^ + +[AJB: the following is supposition and welcomes more informed +opinions] + +Probably due to legacy from the pre-QOM days PCI VirtIO devices don't +follow the normal hierarchy. Instead the a standalone object is based +on the VirtIOPCIProxy class and the specific VirtIO instance is +manually instantiated: + +.. code:: c + + /* + * virtio-blk-pci: This extends VirtioPCIProxy. + */ + #define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base" + DECLARE_INSTANCE_CHECKER(VirtIOBlkPCI, VIRTIO_BLK_PCI, + TYPE_VIRTIO_BLK_PCI) + + struct VirtIOBlkPCI { + VirtIOPCIProxy parent_obj; + VirtIOBlock vdev; + }; + + static Property virtio_blk_pci_properties[] = { + DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), + }; + + static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + { + VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + ... + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + } + + static void virtio_blk_pci_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + device_class_set_props(dc, virtio_blk_pci_properties); + k->realize = virtio_blk_pci_realize; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; + } + + static void virtio_blk_pci_instance_init(Object *obj) + { + VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_BLK); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); + } + + static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { + .base_name = TYPE_VIRTIO_BLK_PCI, + .generic_name = "virtio-blk-pci", + .transitional_name = "virtio-blk-pci-transitional", + .non_transitional_name = "virtio-blk-pci-non-transitional", + .instance_size = sizeof(VirtIOBlkPCI), + .instance_init = virtio_blk_pci_instance_init, + .class_init = virtio_blk_pci_class_init, + }; + +Here you can see the instance_init has to manually instantiate the +underlying ``TYPE_VIRTIO_BLOCK`` object and link an alias for one of +it's properties to the PCI device. + + +Back End Implementations +------------------------ + +There are a number of places where the implementation of the backend +can be done: + +* in QEMU itself +* in the host kernel (a.k.a vhost) +* in a separate process (a.k.a. vhost-user) + +vhost_ops vs TYPE_VHOST_USER_BACKEND +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There are two choices to how to implement vhost code. Most of the code +which has to work with either vhost or vhost-user uses +``vhost_dev_init()`` to instantiate the appropriate backend. This +means including a ``struct vhost_dev`` in the main object structure. + +For vhost-user devices you also need to add code to track the +initialisation of the ``chardev`` device used for the control socket +between QEMU and the external vhost-user process. + +If you only need to implement a vhost-user backed the other option is +a use a QOM-ified version of vhost-user. + +.. code:: c + + static void + vhost_user_gpu_instance_init(Object *obj) + { + VhostUserGPU *g = VHOST_USER_GPU(obj); + + g->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND)); + object_property_add_alias(obj, "chardev", + OBJECT(g->vhost), "chardev"); + } + + static const TypeInfo vhost_user_gpu_info = { + .name = TYPE_VHOST_USER_GPU, + .parent = TYPE_VIRTIO_GPU_BASE, + .instance_size = sizeof(VhostUserGPU), + .instance_init = vhost_user_gpu_instance_init, + .instance_finalize = vhost_user_gpu_instance_finalize, + .class_init = vhost_user_gpu_class_init, + }; + +Using it this way entails adding a ``struct VhostUserBackend`` to your +core object structure and manually instantiating the backend. This +sub-structure tracks both the ``vhost_dev`` and ``CharDev`` types +needed for the connection. Instead of calling ``vhost_dev_init`` you +would call ``vhost_user_backend_dev_init`` which does what is needed +on your behalf. diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index bdb0f2a41a..f5ca25174a 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, NBD_CMD_FLAG_FAST_ZERO * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" +* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst index 71a2c52b31..1640553729 100644 --- a/docs/interop/vhost-user-gpu.rst +++ b/docs/interop/vhost-user-gpu.rst @@ -13,10 +13,10 @@ Introduction ============ The vhost-user-gpu protocol is aiming at sharing the rendering result -of a virtio-gpu, done from a vhost-user slave process to a vhost-user -master process (such as QEMU). It bears a resemblance to a display +of a virtio-gpu, done from a vhost-user back-end process to a vhost-user +front-end process (such as QEMU). It bears a resemblance to a display server protocol, if you consider QEMU as the display server and the -slave as the client, but in a very limited way. Typically, it will +back-end as the client, but in a very limited way. Typically, it will work by setting a scanout/display configuration, before sending flush events for the display updates. It will also update the cursor shape and position. @@ -26,8 +26,8 @@ socket ancillary data to share opened file descriptors (DMABUF fds or shared memory). The socket is usually obtained via ``VHOST_USER_GPU_SET_SOCKET``. -Requests are sent by the *slave*, and the optional replies by the -*master*. +Requests are sent by the *back-end*, and the optional replies by the +*front-end*. Wire format =========== diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index f9e721ba5f..a99ba4433c 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -23,19 +23,19 @@ space process on the same host. It uses communication over a Unix domain socket to share file descriptors in the ancillary data of the message. -The protocol defines 2 sides of the communication, *master* and -*slave*. *Master* is the application that shares its virtqueues, in -our case QEMU. *Slave* is the consumer of the virtqueues. +The protocol defines 2 sides of the communication, *front-end* and +*back-end*. The *front-end* is the application that shares its virtqueues, in +our case QEMU. The *back-end* is the consumer of the virtqueues. -In the current implementation QEMU is the *master*, and the *slave* is -the external process consuming the virtio queues, for example a +In the current implementation QEMU is the *front-end*, and the *back-end* +is the external process consuming the virtio queues, for example a software Ethernet switch running in user space, such as Snabbswitch, -or a block device backend processing read & write to a virtual -disk. In order to facilitate interoperability between various backend +or a block device back-end processing read & write to a virtual +disk. In order to facilitate interoperability between various back-end implementations, it is recommended to follow the :ref:`Backend program conventions <backend_conventions>`. -*Master* and *slave* can be either a client (i.e. connecting) or +The *front-end* and *back-end* can be either a client (i.e. connecting) or server (listening) in the socket communication. Support for platforms other than Linux @@ -77,7 +77,7 @@ Header :flags: 32-bit bit field - Lower 2 bits are the version (currently 0x01) -- Bit 2 is the reply flag - needs to be sent on each reply from the slave +- Bit 2 is the reply flag - needs to be sent on each reply from the back-end - Bit 3 is the need_reply flag - see :ref:`REPLY_ACK <reply_ack>` for details. @@ -222,8 +222,8 @@ Virtio device config space :size: a 32-bit configuration space access size in bytes :flags: a 32-bit value: - - 0: Vhost master messages used for writeable fields - - 1: Vhost master messages used for live migration + - 0: Vhost front-end messages used for writeable fields + - 1: Vhost front-end messages used for live migration :payload: Size bytes array holding the contents of the virtio device's configuration space @@ -290,8 +290,8 @@ vhost for the Linux Kernel. Most messages that can be sent via the Unix domain socket implementing vhost-user have an equivalent ioctl to the kernel implementation. -The communication consists of *master* sending message requests and -*slave* sending message replies. Most of the requests don't require +The communication consists of the *front-end* sending message requests and +the *back-end* sending message replies. Most of the requests don't require replies. Here is a list of the ones that do: * ``VHOST_USER_GET_FEATURES`` @@ -305,7 +305,7 @@ replies. Here is a list of the ones that do: :ref:`REPLY_ACK <reply_ack>` The section on ``REPLY_ACK`` protocol extension. -There are several messages that the master sends with file descriptors passed +There are several messages that the front-end sends with file descriptors passed in the ancillary data: * ``VHOST_USER_ADD_MEM_REG`` @@ -318,100 +318,108 @@ in the ancillary data: * ``VHOST_USER_SET_SLAVE_REQ_FD`` * ``VHOST_USER_SET_INFLIGHT_FD`` (if ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD``) -If *master* is unable to send the full message or receives a wrong +If *front-end* is unable to send the full message or receives a wrong reply it will close the connection. An optional reconnection mechanism can be implemented. -If *slave* detects some error such as incompatible features, it may also +If *back-end* detects some error such as incompatible features, it may also close the connection. This should only happen in exceptional circumstances. Any protocol extensions are gated by protocol feature bits, which -allows full backwards compatibility on both master and slave. As -older slaves don't support negotiating protocol features, a feature +allows full backwards compatibility on both front-end and back-end. As +older back-ends don't support negotiating protocol features, a feature bit was dedicated for this purpose:: #define VHOST_USER_F_PROTOCOL_FEATURES 30 -Starting and stopping rings ---------------------------- +Note that VHOST_USER_F_PROTOCOL_FEATURES is the UNUSED (30) feature +bit defined in `VIRTIO 1.1 6.3 Legacy Interface: Reserved Feature Bits +<https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.html#x1-4130003>`_. +VIRTIO devices do not advertise this feature bit and therefore VIRTIO +drivers cannot negotiate it. -Client must only process each ring when it is started. +This reserved feature bit was reused by the vhost-user protocol to add +vhost-user protocol feature negotiation in a backwards compatible +fashion. Old vhost-user front-end and back-end implementations continue to +work even though they are not aware of vhost-user protocol feature +negotiation. -Client must only pass data between the ring and the backend, when the -ring is enabled. +Ring states +----------- -If ring is started but disabled, client must process the ring without -talking to the backend. +Rings can be in one of three states: -For example, for a networking device, in the disabled state client -must not supply any new RX packets, but must process and discard any -TX packets. +* stopped: the back-end must not process the ring at all. -If ``VHOST_USER_F_PROTOCOL_FEATURES`` has not been negotiated, the -ring is initialized in an enabled state. +* started but disabled: the back-end must process the ring without + causing any side effects. For example, for a networking device, + in the disabled state the back-end must not supply any new RX packets, + but must process and discard any TX packets. -If ``VHOST_USER_F_PROTOCOL_FEATURES`` has been negotiated, the ring is -initialized in a disabled state. Client must not pass data to/from the -backend until ring is enabled by ``VHOST_USER_SET_VRING_ENABLE`` with -parameter 1, or after it has been disabled by -``VHOST_USER_SET_VRING_ENABLE`` with parameter 0. +* started and enabled. -Each ring is initialized in a stopped state, client must not process -it until ring is started, or after it has been stopped. +Each ring is initialized in a stopped state. The back-end must start +ring upon receiving a kick (that is, detecting that file descriptor is +readable) on the descriptor specified by ``VHOST_USER_SET_VRING_KICK`` +or receiving the in-band message ``VHOST_USER_VRING_KICK`` if negotiated, +and stop ring upon receiving ``VHOST_USER_GET_VRING_BASE``. -Client must start ring upon receiving a kick (that is, detecting that -file descriptor is readable) on the descriptor specified by -``VHOST_USER_SET_VRING_KICK`` or receiving the in-band message -``VHOST_USER_VRING_KICK`` if negotiated, and stop ring upon receiving -``VHOST_USER_GET_VRING_BASE``. +Rings can be enabled or disabled by ``VHOST_USER_SET_VRING_ENABLE``. -While processing the rings (whether they are enabled or not), client +If ``VHOST_USER_F_PROTOCOL_FEATURES`` has not been negotiated, the +ring starts directly in the enabled state. + +If ``VHOST_USER_F_PROTOCOL_FEATURES`` has been negotiated, the ring is +initialized in a disabled state and is enabled by +``VHOST_USER_SET_VRING_ENABLE`` with parameter 1. + +While processing the rings (whether they are enabled or not), the back-end must support changing some configuration aspects on the fly. Multiple queue support ---------------------- -Many devices have a fixed number of virtqueues. In this case the master +Many devices have a fixed number of virtqueues. In this case the front-end already knows the number of available virtqueues without communicating with the -slave. +back-end. Some devices do not have a fixed number of virtqueues. Instead the maximum -number of virtqueues is chosen by the slave. The number can depend on host -resource availability or slave implementation details. Such devices are called +number of virtqueues is chosen by the back-end. The number can depend on host +resource availability or back-end implementation details. Such devices are called multiple queue devices. -Multiple queue support allows the slave to advertise the maximum number of -queues. This is treated as a protocol extension, hence the slave has to +Multiple queue support allows the back-end to advertise the maximum number of +queues. This is treated as a protocol extension, hence the back-end has to implement protocol features first. The multiple queues feature is supported only when the protocol feature ``VHOST_USER_PROTOCOL_F_MQ`` (bit 0) is set. -The max number of queues the slave supports can be queried with message -``VHOST_USER_GET_QUEUE_NUM``. Master should stop when the number of requested +The max number of queues the back-end supports can be queried with message +``VHOST_USER_GET_QUEUE_NUM``. Front-end should stop when the number of requested queues is bigger than that. -As all queues share one connection, the master uses a unique index for each +As all queues share one connection, the front-end uses a unique index for each queue in the sent message to identify a specified queue. -The master enables queues by sending message ``VHOST_USER_SET_VRING_ENABLE``. +The front-end enables queues by sending message ``VHOST_USER_SET_VRING_ENABLE``. vhost-user-net has historically automatically enabled the first queue pair. -Slaves should always implement the ``VHOST_USER_PROTOCOL_F_MQ`` protocol +Back-ends should always implement the ``VHOST_USER_PROTOCOL_F_MQ`` protocol feature, even for devices with a fixed number of virtqueues, since it is simple to implement and offers a degree of introspection. -Masters must not rely on the ``VHOST_USER_PROTOCOL_F_MQ`` protocol feature for +Front-ends must not rely on the ``VHOST_USER_PROTOCOL_F_MQ`` protocol feature for devices with a fixed number of virtqueues. Only true multiqueue devices require this protocol feature. Migration --------- -During live migration, the master may need to track the modifications -the slave makes to the memory mapped regions. The client should mark +During live migration, the front-end may need to track the modifications +the back-end makes to the memory mapped regions. The front-end should mark the dirty pages in a log. Once it complies to this logging, it may declare the ``VHOST_F_LOG_ALL`` vhost feature. -To start/stop logging of data/used ring writes, server may send +To start/stop logging of data/used ring writes, the front-end may send messages ``VHOST_USER_SET_FEATURES`` with ``VHOST_F_LOG_ALL`` and ``VHOST_USER_SET_VRING_ADDR`` with ``VHOST_VRING_F_LOG`` in ring's flags set to 1/0, respectively. @@ -425,7 +433,7 @@ Dirty pages are of size:: #define VHOST_LOG_PAGE 0x1000 The log memory fd is provided in the ancillary data of -``VHOST_USER_SET_LOG_BASE`` message when the slave has +``VHOST_USER_SET_LOG_BASE`` message when the back-end has ``VHOST_USER_PROTOCOL_F_LOG_SHMFD`` protocol feature. The size of the log is supplied as part of ``VhostUserMsg`` which @@ -451,26 +459,26 @@ the bit offset of the last byte of the ring must fall within the size supplied by ``VhostUserLog``. ``VHOST_USER_SET_LOG_FD`` is an optional message with an eventfd in -ancillary data, it may be used to inform the master that the log has +ancillary data, it may be used to inform the front-end that the log has been modified. Once the source has finished migration, rings will be stopped by the source. No further update must be done before rings are restarted. -In postcopy migration the slave is started before all the memory has +In postcopy migration the back-end is started before all the memory has been received from the source host, and care must be taken to avoid -accessing pages that have yet to be received. The slave opens a +accessing pages that have yet to be received. The back-end opens a 'userfault'-fd and registers the memory with it; this fd is then -passed back over to the master. The master services requests on the +passed back over to the front-end. The front-end services requests on the userfaultfd for pages that are accessed and when the page is available it performs WAKE ioctl's on the userfaultfd to wake the stalled -slave. The client indicates support for this via the +back-end. The front-end indicates support for this via the ``VHOST_USER_PROTOCOL_F_PAGEFAULT`` feature. Memory access ------------- -The master sends a list of vhost memory regions to the slave using the +The front-end sends a list of vhost memory regions to the back-end using the ``VHOST_USER_SET_MEM_TABLE`` message. Each region has two base addresses: a guest address and a user address. @@ -495,60 +503,60 @@ IOMMU support ------------- When the ``VIRTIO_F_IOMMU_PLATFORM`` feature has been negotiated, the -master sends IOTLB entries update & invalidation by sending -``VHOST_USER_IOTLB_MSG`` requests to the slave with a ``struct +front-end sends IOTLB entries update & invalidation by sending +``VHOST_USER_IOTLB_MSG`` requests to the back-end with a ``struct vhost_iotlb_msg`` as payload. For update events, the ``iotlb`` payload has to be filled with the update message type (2), the I/O virtual address, the size, the user virtual address, and the permissions flags. Addresses and size must be within vhost memory regions set via the ``VHOST_USER_SET_MEM_TABLE`` request. For invalidation events, the ``iotlb`` payload has to be filled with the invalidation message type -(3), the I/O virtual address and the size. On success, the slave is +(3), the I/O virtual address and the size. On success, the back-end is expected to reply with a zero payload, non-zero otherwise. -The slave relies on the slave communication channel (see :ref:`Slave -communication <slave_communication>` section below) to send IOTLB miss +The back-end relies on the back-end communication channel (see :ref:`Back-end +communication <backend_communication>` section below) to send IOTLB miss and access failure events, by sending ``VHOST_USER_SLAVE_IOTLB_MSG`` -requests to the master with a ``struct vhost_iotlb_msg`` as +requests to the front-end with a ``struct vhost_iotlb_msg`` as payload. For miss events, the iotlb payload has to be filled with the miss message type (1), the I/O virtual address and the permissions flags. For access failure event, the iotlb payload has to be filled with the access failure message type (4), the I/O virtual address and -the permissions flags. For synchronization purpose, the slave may -rely on the reply-ack feature, so the master may send a reply when +the permissions flags. For synchronization purpose, the back-end may +rely on the reply-ack feature, so the front-end may send a reply when operation is completed if the reply-ack feature is negotiated and -slaves requests a reply. For miss events, completed operation means -either master sent an update message containing the IOTLB entry -containing requested address and permission, or master sent nothing if +back-ends requests a reply. For miss events, completed operation means +either front-end sent an update message containing the IOTLB entry +containing requested address and permission, or front-end sent nothing if the IOTLB miss message is invalid (invalid IOVA or permission). -The master isn't expected to take the initiative to send IOTLB update -messages, as the slave sends IOTLB miss messages for the guest virtual +The front-end isn't expected to take the initiative to send IOTLB update +messages, as the back-end sends IOTLB miss messages for the guest virtual memory areas it needs to access. -.. _slave_communication: +.. _backend_communication: -Slave communication -------------------- +Back-end communication +---------------------- -An optional communication channel is provided if the slave declares +An optional communication channel is provided if the back-end declares ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` protocol feature, to allow the -slave to make requests to the master. +back-end to make requests to the front-end. The fd is provided via ``VHOST_USER_SET_SLAVE_REQ_FD`` ancillary data. -A slave may then send ``VHOST_USER_SLAVE_*`` messages to the master +A back-end may then send ``VHOST_USER_SLAVE_*`` messages to the front-end using this fd communication channel. If ``VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD`` protocol feature is -negotiated, slave can send file descriptors (at most 8 descriptors in -each message) to master via ancillary data using this fd communication +negotiated, back-end can send file descriptors (at most 8 descriptors in +each message) to front-end via ancillary data using this fd communication channel. Inflight I/O tracking --------------------- -To support reconnecting after restart or crash, slave may need to +To support reconnecting after restart or crash, back-end may need to resubmit inflight I/Os. If virtqueue is processed in order, we can easily achieve that by getting the inflight descriptors from descriptor table (split virtqueue) or descriptor ring (packed @@ -556,18 +564,18 @@ virtqueue). However, it can't work when we process descriptors out-of-order because some entries which store the information of inflight descriptors in available ring (split virtqueue) or descriptor ring (packed virtqueue) might be overridden by new entries. To solve -this problem, slave need to allocate an extra buffer to store this -information of inflight descriptors and share it with master for +this problem, the back-end need to allocate an extra buffer to store this +information of inflight descriptors and share it with front-end for persistent. ``VHOST_USER_GET_INFLIGHT_FD`` and ``VHOST_USER_SET_INFLIGHT_FD`` are used to transfer this buffer -between master and slave. And the format of this buffer is described +between front-end and back-end. And the format of this buffer is described below: +---------------+---------------+-----+---------------+ | queue0 region | queue1 region | ... | queueN region | +---------------+---------------+-----+---------------+ -N is the number of available virtqueues. Slave could get it from num +N is the number of available virtqueues. The back-end could get it from num queues field of ``VhostUserInflight``. For split virtqueue, queue region can be implemented as: @@ -599,8 +607,8 @@ For split virtqueue, queue region can be implemented as: * Zero value indicates an uninitialized buffer */ uint16_t version; - /* The size of DescStateSplit array. It's equal to the virtqueue - * size. Slave could get it from queue size field of VhostUserInflight. */ + /* The size of DescStateSplit array. It's equal to the virtqueue size. + * The back-end could get it from queue size field of VhostUserInflight. */ uint16_t desc_num; /* The head of list that track the last batch of used descriptors. */ @@ -706,8 +714,8 @@ For packed virtqueue, queue region can be implemented as: * Zero value indicates an uninitialized buffer */ uint16_t version; - /* The size of DescStatePacked array. It's equal to the virtqueue - * size. Slave could get it from queue size field of VhostUserInflight. */ + /* The size of DescStatePacked array. It's equal to the virtqueue size. + * The back-end could get it from queue size field of VhostUserInflight. */ uint16_t desc_num; /* The head of free DescStatePacked entry list */ @@ -799,7 +807,7 @@ When reconnecting: #. Use ``old_used_wrap_counter`` to calculate the available flags #. If ``d.flags`` is not equal to the calculated flags value (means - slave has submitted the buffer to guest driver before crash, so + back-end has submitted the buffer to guest driver before crash, so it has to commit the in-progres update), set ``old_free_head``, ``old_used_idx``, ``old_used_wrap_counter`` to ``free_head``, ``used_idx``, ``used_wrap_counter`` @@ -828,11 +836,11 @@ cause the sending application(s) to block, it is not advised to use this feature unless absolutely necessary. It is also considered an error to negotiate this feature without also negotiating ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` and ``VHOST_USER_PROTOCOL_F_REPLY_ACK``, -the former is necessary for getting a message channel from the slave -to the master, while the latter needs to be used with the in-band +the former is necessary for getting a message channel from the back-end +to the front-end, while the latter needs to be used with the in-band notification messages to block until they are processed, both to avoid blocking later and for proper processing (at least in the simulation -use case.) As it has no other way of signalling this error, the slave +use case.) As it has no other way of signalling this error, the back-end should close the connection as a response to a ``VHOST_USER_SET_PROTOCOL_FEATURES`` message that sets the in-band notifications feature flag without the other two. @@ -860,95 +868,101 @@ Protocol features #define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS 15 #define VHOST_USER_PROTOCOL_F_STATUS 16 -Master message types --------------------- +Front-end message types +----------------------- ``VHOST_USER_GET_FEATURES`` :id: 1 :equivalent ioctl: ``VHOST_GET_FEATURES`` - :master payload: N/A - :slave payload: ``u64`` + :request payload: N/A + :reply payload: ``u64`` Get from the underlying vhost implementation the features bitmask. - Feature bit ``VHOST_USER_F_PROTOCOL_FEATURES`` signals slave support + Feature bit ``VHOST_USER_F_PROTOCOL_FEATURES`` signals back-end support for ``VHOST_USER_GET_PROTOCOL_FEATURES`` and ``VHOST_USER_SET_PROTOCOL_FEATURES``. ``VHOST_USER_SET_FEATURES`` :id: 2 :equivalent ioctl: ``VHOST_SET_FEATURES`` - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Enable features in the underlying vhost implementation using a bitmask. Feature bit ``VHOST_USER_F_PROTOCOL_FEATURES`` signals - slave support for ``VHOST_USER_GET_PROTOCOL_FEATURES`` and + back-end support for ``VHOST_USER_GET_PROTOCOL_FEATURES`` and ``VHOST_USER_SET_PROTOCOL_FEATURES``. ``VHOST_USER_GET_PROTOCOL_FEATURES`` :id: 15 :equivalent ioctl: ``VHOST_GET_FEATURES`` - :master payload: N/A - :slave payload: ``u64`` + :request payload: N/A + :reply payload: ``u64`` Get the protocol feature bitmask from the underlying vhost implementation. Only legal if feature bit ``VHOST_USER_F_PROTOCOL_FEATURES`` is present in - ``VHOST_USER_GET_FEATURES``. + ``VHOST_USER_GET_FEATURES``. It does not need to be acknowledged by + ``VHOST_USER_SET_FEATURES``. .. Note:: - Slave that reported ``VHOST_USER_F_PROTOCOL_FEATURES`` must + Back-ends that report ``VHOST_USER_F_PROTOCOL_FEATURES`` must support this message even before ``VHOST_USER_SET_FEATURES`` was called. ``VHOST_USER_SET_PROTOCOL_FEATURES`` :id: 16 :equivalent ioctl: ``VHOST_SET_FEATURES`` - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Enable protocol features in the underlying vhost implementation. Only legal if feature bit ``VHOST_USER_F_PROTOCOL_FEATURES`` is present in - ``VHOST_USER_GET_FEATURES``. + ``VHOST_USER_GET_FEATURES``. It does not need to be acknowledged by + ``VHOST_USER_SET_FEATURES``. .. Note:: - Slave that reported ``VHOST_USER_F_PROTOCOL_FEATURES`` must support + Back-ends that report ``VHOST_USER_F_PROTOCOL_FEATURES`` must support this message even before ``VHOST_USER_SET_FEATURES`` was called. ``VHOST_USER_SET_OWNER`` :id: 3 :equivalent ioctl: ``VHOST_SET_OWNER`` - :master payload: N/A + :request payload: N/A + :reply payload: N/A - Issued when a new connection is established. It sets the current - *master* as an owner of the session. This can be used on the *slave* + Issued when a new connection is established. It marks the sender + as the front-end that owns of the session. This can be used on the *back-end* as a "session start" flag. ``VHOST_USER_RESET_OWNER`` :id: 4 - :master payload: N/A + :request payload: N/A + :reply payload: N/A .. admonition:: Deprecated This is no longer used. Used to be sent to request disabling all - rings, but some clients interpreted it to also discard connection + rings, but some back-ends interpreted it to also discard connection state (this interpretation would lead to bugs). It is recommended - that clients either ignore this message, or use it to disable all + that back-ends either ignore this message, or use it to disable all rings. ``VHOST_USER_SET_MEM_TABLE`` :id: 5 :equivalent ioctl: ``VHOST_SET_MEM_TABLE`` - :master payload: memory regions description - :slave payload: (postcopy only) memory regions description + :request payload: memory regions description + :reply payload: (postcopy only) memory regions description - Sets the memory map regions on the slave so it can translate the + Sets the memory map regions on the back-end so it can translate the vring addresses. In the ancillary data there is an array of file descriptors for each memory mapped region. The size and ordering of the fds matches the number and ordering of memory regions. When ``VHOST_USER_POSTCOPY_LISTEN`` has been received, ``SET_MEM_TABLE`` replies with the bases of the memory mapped - regions to the master. The slave must have mmap'd the regions but + regions to the front-end. The back-end must have mmap'd the regions but not yet accessed them and should not yet generate a userfault event. @@ -962,12 +976,12 @@ Master message types ``VHOST_USER_SET_LOG_BASE`` :id: 6 :equivalent ioctl: ``VHOST_SET_LOG_BASE`` - :master payload: u64 - :slave payload: N/A + :request payload: u64 + :reply payload: N/A Sets logging shared memory space. - When slave has ``VHOST_USER_PROTOCOL_F_LOG_SHMFD`` protocol feature, + When the back-end has ``VHOST_USER_PROTOCOL_F_LOG_SHMFD`` protocol feature, the log memory fd is provided in the ancillary data of ``VHOST_USER_SET_LOG_BASE`` message, the size and offset of shared memory area provided in the message. @@ -975,44 +989,48 @@ Master message types ``VHOST_USER_SET_LOG_FD`` :id: 7 :equivalent ioctl: ``VHOST_SET_LOG_FD`` - :master payload: N/A + :request payload: N/A + :reply payload: N/A Sets the logging file descriptor, which is passed as ancillary data. ``VHOST_USER_SET_VRING_NUM`` :id: 8 :equivalent ioctl: ``VHOST_SET_VRING_NUM`` - :master payload: vring state description + :request payload: vring state description + :reply payload: N/A Set the size of the queue. ``VHOST_USER_SET_VRING_ADDR`` :id: 9 :equivalent ioctl: ``VHOST_SET_VRING_ADDR`` - :master payload: vring address description - :slave payload: N/A + :request payload: vring address description + :reply payload: N/A Sets the addresses of the different aspects of the vring. ``VHOST_USER_SET_VRING_BASE`` :id: 10 :equivalent ioctl: ``VHOST_SET_VRING_BASE`` - :master payload: vring state description + :request payload: vring state description + :reply payload: N/A Sets the base offset in the available vring. ``VHOST_USER_GET_VRING_BASE`` :id: 11 :equivalent ioctl: ``VHOST_USER_GET_VRING_BASE`` - :master payload: vring state description - :slave payload: vring state description + :request payload: vring state description + :reply payload: vring state description Get the available vring base offset. ``VHOST_USER_SET_VRING_KICK`` :id: 12 :equivalent ioctl: ``VHOST_SET_VRING_KICK`` - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Set the event file descriptor for adding buffers to the vring. It is passed in the ancillary data. @@ -1030,7 +1048,8 @@ Master message types ``VHOST_USER_SET_VRING_CALL`` :id: 13 :equivalent ioctl: ``VHOST_SET_VRING_CALL`` - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Set the event file descriptor to signal when buffers are used. It is passed in the ancillary data. @@ -1048,7 +1067,8 @@ Master message types ``VHOST_USER_SET_VRING_ERR`` :id: 14 :equivalent ioctl: ``VHOST_SET_VRING_ERR`` - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Set the event file descriptor to signal when error occurs. It is passed in the ancillary data. @@ -1065,10 +1085,10 @@ Master message types ``VHOST_USER_GET_QUEUE_NUM`` :id: 17 :equivalent ioctl: N/A - :master payload: N/A - :slave payload: u64 + :request payload: N/A + :reply payload: u64 - Query how many queues the backend supports. + Query how many queues the back-end supports. This request should be sent only when ``VHOST_USER_PROTOCOL_F_MQ`` is set in queried protocol features by @@ -1077,9 +1097,10 @@ Master message types ``VHOST_USER_SET_VRING_ENABLE`` :id: 18 :equivalent ioctl: N/A - :master payload: vring state description + :request payload: vring state description + :reply payload: N/A - Signal slave to enable or disable corresponding vring. + Signal the back-end to enable or disable corresponding vring. This request should be sent only when ``VHOST_USER_F_PROTOCOL_FEATURES`` has been negotiated. @@ -1087,9 +1108,10 @@ Master message types ``VHOST_USER_SEND_RARP`` :id: 19 :equivalent ioctl: N/A - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A - Ask vhost user backend to broadcast a fake RARP to notify the migration + Ask vhost user back-end to broadcast a fake RARP to notify the migration is terminated for guest that does not support GUEST_ANNOUNCE. Only legal if feature bit ``VHOST_USER_F_PROTOCOL_FEATURES`` is @@ -1097,12 +1119,13 @@ Master message types ``VHOST_USER_PROTOCOL_F_RARP`` is present in ``VHOST_USER_GET_PROTOCOL_FEATURES``. The first 6 bytes of the payload contain the mac address of the guest to allow the vhost user - backend to construct and broadcast the fake RARP. + back-end to construct and broadcast the fake RARP. ``VHOST_USER_NET_SET_MTU`` :id: 20 :equivalent ioctl: N/A - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Set host MTU value exposed to the guest. @@ -1112,35 +1135,36 @@ Master message types ``VHOST_USER_PROTOCOL_F_NET_MTU`` is present in ``VHOST_USER_GET_PROTOCOL_FEATURES``. - If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, slave must + If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, the back-end must respond with zero in case the specified MTU is valid, or non-zero otherwise. ``VHOST_USER_SET_SLAVE_REQ_FD`` :id: 21 :equivalent ioctl: N/A - :master payload: N/A + :request payload: N/A + :reply payload: N/A - Set the socket file descriptor for slave initiated requests. It is passed + Set the socket file descriptor for back-end initiated requests. It is passed in the ancillary data. This request should be sent only when ``VHOST_USER_F_PROTOCOL_FEATURES`` has been negotiated, and protocol feature bit ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` bit is present in ``VHOST_USER_GET_PROTOCOL_FEATURES``. If - ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, slave must + ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, the back-end must respond with zero for success, non-zero otherwise. ``VHOST_USER_IOTLB_MSG`` :id: 22 :equivalent ioctl: N/A (equivalent to ``VHOST_IOTLB_MSG`` message type) - :master payload: ``struct vhost_iotlb_msg`` - :slave payload: ``u64`` + :request payload: ``struct vhost_iotlb_msg`` + :reply payload: ``u64`` Send IOTLB messages with ``struct vhost_iotlb_msg`` as payload. - Master sends such requests to update and invalidate entries in the - device IOTLB. The slave has to acknowledge the request with sending + The front-end sends such requests to update and invalidate entries in the + device IOTLB. The back-end has to acknowledge the request with sending zero as ``u64`` payload for success, non-zero otherwise. This request should be send only when ``VIRTIO_F_IOMMU_PLATFORM`` @@ -1149,7 +1173,8 @@ Master message types ``VHOST_USER_SET_VRING_ENDIAN`` :id: 23 :equivalent ioctl: ``VHOST_SET_VRING_ENDIAN`` - :master payload: vring state description + :request payload: vring state description + :reply payload: N/A Set the endianness of a VQ for legacy devices. Little-endian is indicated with state.num set to 0 and big-endian is indicated with @@ -1159,42 +1184,42 @@ Master message types ``VHOST_USER_PROTOCOL_F_CROSS_ENDIAN`` has been negotiated. Backends that negotiated this feature should handle both endiannesses and expect this message once (per VQ) during device - configuration (ie. before the master starts the VQ). + configuration (ie. before the front-end starts the VQ). ``VHOST_USER_GET_CONFIG`` :id: 24 :equivalent ioctl: N/A - :master payload: virtio device config space - :slave payload: virtio device config space + :request payload: virtio device config space + :reply payload: virtio device config space When ``VHOST_USER_PROTOCOL_F_CONFIG`` is negotiated, this message is - submitted by the vhost-user master to fetch the contents of the - virtio device configuration space, vhost-user slave's payload size - MUST match master's request, vhost-user slave uses zero length of - payload to indicate an error to vhost-user master. The vhost-user - master may cache the contents to avoid repeated + submitted by the vhost-user front-end to fetch the contents of the + virtio device configuration space, vhost-user back-end's payload size + MUST match the front-end's request, vhost-user back-end uses zero length of + payload to indicate an error to the vhost-user front-end. The vhost-user + front-end may cache the contents to avoid repeated ``VHOST_USER_GET_CONFIG`` calls. ``VHOST_USER_SET_CONFIG`` :id: 25 :equivalent ioctl: N/A - :master payload: virtio device config space - :slave payload: N/A + :request payload: virtio device config space + :reply payload: N/A When ``VHOST_USER_PROTOCOL_F_CONFIG`` is negotiated, this message is - submitted by the vhost-user master when the Guest changes the virtio + submitted by the vhost-user front-end when the Guest changes the virtio device configuration space and also can be used for live migration - on the destination host. The vhost-user slave must check the flags - field, and slaves MUST NOT accept SET_CONFIG for read-only + on the destination host. The vhost-user back-end must check the flags + field, and back-ends MUST NOT accept SET_CONFIG for read-only configuration space fields unless the live migration bit is set. ``VHOST_USER_CREATE_CRYPTO_SESSION`` :id: 26 :equivalent ioctl: N/A - :master payload: crypto session description - :slave payload: crypto session description + :request payload: crypto session description + :reply payload: crypto session description - Create a session for crypto operation. The server side must return + Create a session for crypto operation. The back-end must return the session id, 0 or positive for success, negative for failure. This request should be sent only when ``VHOST_USER_PROTOCOL_F_CRYPTO_SESSION`` feature has been @@ -1204,7 +1229,8 @@ Master message types ``VHOST_USER_CLOSE_CRYPTO_SESSION`` :id: 27 :equivalent ioctl: N/A - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A Close a session for crypto operation which was previously created by ``VHOST_USER_CREATE_CRYPTO_SESSION``. @@ -1216,20 +1242,21 @@ Master message types ``VHOST_USER_POSTCOPY_ADVISE`` :id: 28 - :master payload: N/A - :slave payload: userfault fd + :request payload: N/A + :reply payload: userfault fd - When ``VHOST_USER_PROTOCOL_F_PAGEFAULT`` is supported, the master - advises slave that a migration with postcopy enabled is underway, - the slave must open a userfaultfd for later use. Note that at this + When ``VHOST_USER_PROTOCOL_F_PAGEFAULT`` is supported, the front-end + advises back-end that a migration with postcopy enabled is underway, + the back-end must open a userfaultfd for later use. Note that at this stage the migration is still in precopy mode. ``VHOST_USER_POSTCOPY_LISTEN`` :id: 29 - :master payload: N/A + :request payload: N/A + :reply payload: N/A - Master advises slave that a transition to postcopy mode has - happened. The slave must ensure that shared memory is registered + The front-end advises back-end that a transition to postcopy mode has + happened. The back-end must ensure that shared memory is registered with userfaultfd to cause faulting of non-present pages. This is always sent sometime after a ``VHOST_USER_POSTCOPY_ADVISE``, @@ -1237,10 +1264,11 @@ Master message types ``VHOST_USER_POSTCOPY_END`` :id: 30 - :slave payload: ``u64`` + :request payload: N/A + :reply payload: ``u64`` - Master advises that postcopy migration has now completed. The slave - must disable the userfaultfd. The response is an acknowledgement + The front-end advises that postcopy migration has now completed. The back-end + must disable the userfaultfd. The reply is an acknowledgement only. When ``VHOST_USER_PROTOCOL_F_PAGEFAULT`` is supported, this message @@ -1252,156 +1280,181 @@ Master message types ``VHOST_USER_GET_INFLIGHT_FD`` :id: 31 :equivalent ioctl: N/A - :master payload: inflight description + :request payload: inflight description + :reply payload: N/A When ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD`` protocol feature has - been successfully negotiated, this message is submitted by master to - get a shared buffer from slave. The shared buffer will be used to - track inflight I/O by slave. QEMU should retrieve a new one when vm + been successfully negotiated, this message is submitted by the front-end to + get a shared buffer from back-end. The shared buffer will be used to + track inflight I/O by back-end. QEMU should retrieve a new one when vm reset. ``VHOST_USER_SET_INFLIGHT_FD`` :id: 32 :equivalent ioctl: N/A - :master payload: inflight description + :request payload: inflight description + :reply payload: N/A When ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD`` protocol feature has - been successfully negotiated, this message is submitted by master to - send the shared inflight buffer back to slave so that slave could - get inflight I/O after a crash or restart. + been successfully negotiated, this message is submitted by the front-end to + send the shared inflight buffer back to the back-end so that the back-end + could get inflight I/O after a crash or restart. ``VHOST_USER_GPU_SET_SOCKET`` :id: 33 :equivalent ioctl: N/A - :master payload: N/A + :request payload: N/A + :reply payload: N/A Sets the GPU protocol socket file descriptor, which is passed as - ancillary data. The GPU protocol is used to inform the master of + ancillary data. The GPU protocol is used to inform the front-end of rendering state and updates. See vhost-user-gpu.rst for details. ``VHOST_USER_RESET_DEVICE`` :id: 34 :equivalent ioctl: N/A - :master payload: N/A - :slave payload: N/A + :request payload: N/A + :reply payload: N/A - Ask the vhost user backend to disable all rings and reset all + Ask the vhost user back-end to disable all rings and reset all internal device state to the initial state, ready to be - reinitialized. The backend retains ownership of the device + reinitialized. The back-end retains ownership of the device throughout the reset operation. Only valid if the ``VHOST_USER_PROTOCOL_F_RESET_DEVICE`` protocol - feature is set by the backend. + feature is set by the back-end. ``VHOST_USER_VRING_KICK`` :id: 35 :equivalent ioctl: N/A - :slave payload: vring state description - :master payload: N/A + :request payload: vring state description + :reply payload: N/A When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol feature has been successfully negotiated, this message may be - submitted by the master to indicate that a buffer was added to + submitted by the front-end to indicate that a buffer was added to the vring instead of signalling it using the vring's kick file - descriptor or having the slave rely on polling. + descriptor or having the back-end rely on polling. The state.num field is currently reserved and must be set to 0. ``VHOST_USER_GET_MAX_MEM_SLOTS`` :id: 36 :equivalent ioctl: N/A - :slave payload: u64 + :request payload: N/A + :reply payload: u64 When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has been successfully negotiated, this message is submitted - by master to the slave. The slave should return the message with a + by the front-end to the back-end. The back-end should return the message with a u64 payload containing the maximum number of memory slots for - QEMU to expose to the guest. The value returned by the backend + QEMU to expose to the guest. The value returned by the back-end will be capped at the maximum number of ram slots which can be supported by the target platform. ``VHOST_USER_ADD_MEM_REG`` :id: 37 :equivalent ioctl: N/A - :slave payload: single memory region description + :request payload: N/A + :reply payload: single memory region description When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has been successfully negotiated, this message is submitted - by the master to the slave. The message payload contains a memory + by the front-end to the back-end. The message payload contains a memory region descriptor struct, describing a region of guest memory which - the slave device must map in. When the + the back-end device must map in. When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has been successfully negotiated, along with the ``VHOST_USER_REM_MEM_REG`` message, this message is used to set and - update the memory tables of the slave device. + update the memory tables of the back-end device. + + Exactly one file descriptor from which the memory is mapped is + passed in the ancillary data. + + In postcopy mode (see ``VHOST_USER_POSTCOPY_LISTEN``), the back-end + replies with the bases of the memory mapped region to the front-end. + For further details on postcopy, see ``VHOST_USER_SET_MEM_TABLE``. + They apply to ``VHOST_USER_ADD_MEM_REG`` accordingly. Exactly one file descriptor from which the memory is mapped is passed in the ancillary data. - In postcopy mode (see ``VHOST_USER_POSTCOPY_LISTEN``), the slave - replies with the bases of the memory mapped region to the master. + In postcopy mode (see ``VHOST_USER_POSTCOPY_LISTEN``), the back-end + replies with the bases of the memory mapped region to the front-end. For further details on postcopy, see ``VHOST_USER_SET_MEM_TABLE``. They apply to ``VHOST_USER_ADD_MEM_REG`` accordingly. ``VHOST_USER_REM_MEM_REG`` :id: 38 :equivalent ioctl: N/A - :slave payload: single memory region description + :request payload: N/A + :reply payload: single memory region description When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has been successfully negotiated, this message is submitted - by the master to the slave. The message payload contains a memory + by the front-end to the back-end. The message payload contains a memory region descriptor struct, describing a region of guest memory which - the slave device must unmap. When the + the back-end device must unmap. When the ``VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS`` protocol feature has been successfully negotiated, along with the ``VHOST_USER_ADD_MEM_REG`` message, this message is used to set and - update the memory tables of the slave device. + update the memory tables of the back-end device. The memory region to be removed is identified by its guest address, user address and size. The mmap offset is ignored. No file descriptors SHOULD be passed in the ancillary data. For - compatibility with existing incorrect implementations, the slave MAY + compatibility with existing incorrect implementations, the back-end MAY accept messages with one file descriptor. If a file descriptor is - passed, the slave MUST close it without using it otherwise. + passed, the back-end MUST close it without using it otherwise. + + The memory region to be removed is identified by its guest address, + user address and size. The mmap offset is ignored. + + No file descriptors SHOULD be passed in the ancillary data. For + compatibility with existing incorrect implementations, the back-end MAY + accept messages with one file descriptor. If a file descriptor is + passed, the back-end MUST close it without using it otherwise. ``VHOST_USER_SET_STATUS`` :id: 39 :equivalent ioctl: VHOST_VDPA_SET_STATUS - :slave payload: N/A - :master payload: ``u64`` + :request payload: ``u64`` + :reply payload: N/A When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been - successfully negotiated, this message is submitted by the master to - notify the backend with updated device status as defined in the Virtio + successfully negotiated, this message is submitted by the front-end to + notify the back-end with updated device status as defined in the Virtio specification. ``VHOST_USER_GET_STATUS`` :id: 40 :equivalent ioctl: VHOST_VDPA_GET_STATUS - :slave payload: ``u64`` - :master payload: N/A + :request payload: N/A + :reply payload: ``u64`` When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been - successfully negotiated, this message is submitted by the master to - query the backend for its device status as defined in the Virtio + successfully negotiated, this message is submitted by the front-end to + query the back-end for its device status as defined in the Virtio specification. -Slave message types -------------------- +Back-end message types +---------------------- + +For this type of message, the request is sent by the back-end and the reply +is sent by the front-end. ``VHOST_USER_SLAVE_IOTLB_MSG`` :id: 1 :equivalent ioctl: N/A (equivalent to ``VHOST_IOTLB_MSG`` message type) - :slave payload: ``struct vhost_iotlb_msg`` - :master payload: N/A + :request payload: ``struct vhost_iotlb_msg`` + :reply payload: N/A Send IOTLB messages with ``struct vhost_iotlb_msg`` as payload. - Slave sends such requests to notify of an IOTLB miss, or an IOTLB + The back-end sends such requests to notify of an IOTLB miss, or an IOTLB access failure. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is - negotiated, and slave set the ``VHOST_USER_NEED_REPLY`` flag, master + negotiated, and back-end set the ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond with zero when operation is successfully completed, or non-zero otherwise. This request should be send only when ``VIRTIO_F_IOMMU_PLATFORM`` feature has been successfully @@ -1410,23 +1463,23 @@ Slave message types ``VHOST_USER_SLAVE_CONFIG_CHANGE_MSG`` :id: 2 :equivalent ioctl: N/A - :slave payload: N/A - :master payload: N/A + :request payload: N/A + :reply payload: N/A When ``VHOST_USER_PROTOCOL_F_CONFIG`` is negotiated, vhost-user - slave sends such messages to notify that the virtio device's + back-end sends such messages to notify that the virtio device's configuration space has changed, for those host devices which can support such feature, host driver can send ``VHOST_USER_GET_CONFIG`` - message to slave to get the latest content. If - ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and slave set the - ``VHOST_USER_NEED_REPLY`` flag, master must respond with zero when + message to the back-end to get the latest content. If + ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and the back-end sets the + ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond with zero when operation is successfully completed, or non-zero otherwise. ``VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG`` :id: 3 :equivalent ioctl: N/A - :slave payload: vring area description - :master payload: N/A + :request payload: vring area description + :reply payload: N/A Sets host notifier for a specified queue. The queue index is contained in the ``u64`` field of the vring area description. The @@ -1437,7 +1490,7 @@ Slave message types description. QEMU can mmap the file descriptor based on the size and offset to get a memory range. Registering a host notifier means mapping this memory range to the VM as the specified queue's notify - MMIO region. Slave sends this request to tell QEMU to de-register + MMIO region. The back-end sends this request to tell QEMU to de-register the existing notifier if any and register the new notifier if the request is sent with a file descriptor. @@ -1448,28 +1501,28 @@ Slave message types ``VHOST_USER_SLAVE_VRING_CALL`` :id: 4 :equivalent ioctl: N/A - :slave payload: vring state description - :master payload: N/A + :request payload: vring state description + :reply payload: N/A When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol feature has been successfully negotiated, this message may be - submitted by the slave to indicate that a buffer was used from + submitted by the back-end to indicate that a buffer was used from the vring instead of signalling this using the vring's call file - descriptor or having the master relying on polling. + descriptor or having the front-end relying on polling. The state.num field is currently reserved and must be set to 0. ``VHOST_USER_SLAVE_VRING_ERR`` :id: 5 :equivalent ioctl: N/A - :slave payload: vring state description - :master payload: N/A + :request payload: vring state description + :reply payload: N/A When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol feature has been successfully negotiated, this message may be - submitted by the slave to indicate that an error occurred on the + submitted by the back-end to indicate that an error occurred on the specific vring, instead of signalling the error file descriptor - set by the master via ``VHOST_USER_SET_VRING_ERR``. + set by the front-end via ``VHOST_USER_SET_VRING_ERR``. The state.num field is currently reserved and must be set to 0. @@ -1480,21 +1533,21 @@ VHOST_USER_PROTOCOL_F_REPLY_ACK The original vhost-user specification only demands replies for certain commands. This differs from the vhost protocol implementation where -commands are sent over an ``ioctl()`` call and block until the client +commands are sent over an ``ioctl()`` call and block until the back-end has completed. With this protocol extension negotiated, the sender (QEMU) can set the ``need_reply`` [Bit 3] flag to any command. This indicates that the -client MUST respond with a Payload ``VhostUserMsg`` indicating success +back-end MUST respond with a Payload ``VhostUserMsg`` indicating success or failure. The payload should be set to zero on success or non-zero on failure, unless the message already has an explicit reply body. -The response payload gives QEMU a deterministic indication of the result +The reply payload gives QEMU a deterministic indication of the result of the command. Today, QEMU is expected to terminate the main vhost-user loop upon receiving such errors. In future, qemu could be taught to be more resilient for selective requests. -For the message types that already solicit a reply from the client, +For the message types that already solicit a reply from the back-end, the presence of ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` or need_reply bit being set brings no behavioural change. (See the Communication_ section for details.) @@ -1504,26 +1557,26 @@ section for details.) Backend program conventions =========================== -vhost-user backends can provide various devices & services and may +vhost-user back-ends can provide various devices & services and may need to be configured manually depending on the use case. However, it is a good idea to follow the conventions listed here when possible. Users, QEMU or libvirt, can then rely on some common behaviour to avoid heterogeneous configuration and management of the -backend programs and facilitate interoperability. +back-end programs and facilitate interoperability. -Each backend installed on a host system should come with at least one +Each back-end installed on a host system should come with at least one JSON file that conforms to the vhost-user.json schema. Each file -informs the management applications about the backend type, and binary +informs the management applications about the back-end type, and binary location. In addition, it defines rules for management apps for -picking the highest priority backend when multiple match the search +picking the highest priority back-end when multiple match the search criteria (see ``@VhostUserBackend`` documentation in the schema file). -If the backend is not capable of enabling a requested feature on the +If the back-end is not capable of enabling a requested feature on the host (such as 3D acceleration with virgl), or the initialization -failed, the backend should fail to start early and exit with a status +failed, the back-end should fail to start early and exit with a status != 0. It may also print a message to stderr for further details. -The backend program must not daemonize itself, but it may be +The back-end program must not daemonize itself, but it may be daemonized by the management layer. It may also have a restricted access to the system. @@ -1531,7 +1584,7 @@ File descriptors 0, 1 and 2 will exist, and have regular stdin/stdout/stderr usage (they may have been redirected to /dev/null by the management layer, or to a log handler). -The backend program must end (as quickly and cleanly as possible) when +The back-end program must end (as quickly and cleanly as possible) when the SIGTERM signal is received. Eventually, it may receive SIGKILL by the management layer after a few seconds. @@ -1545,15 +1598,15 @@ are mandatory, unless explicitly said differently: --fd=FDNUM - When this argument is given, the backend program is started with the + When this argument is given, the back-end program is started with the vhost-user socket as file descriptor FDNUM. It is incompatible with --socket-path. --print-capabilities - Output to stdout the backend capabilities in JSON format, and then + Output to stdout the back-end capabilities in JSON format, and then exit successfully. Other options and arguments should be ignored, and - the backend program should not perform its normal function. The + the back-end program should not perform its normal function. The capabilities can be reported dynamically depending on the host capabilities. diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst index ae8dd233e8..3b729b920d 100644 --- a/docs/system/device-emulation.rst +++ b/docs/system/device-emulation.rst @@ -84,6 +84,7 @@ Emulated Devices devices/can.rst devices/ccid.rst + devices/cxl.rst devices/ivshmem.rst devices/net.rst devices/nvme.rst diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst new file mode 100644 index 0000000000..9293cbf01a --- /dev/null +++ b/docs/system/devices/cxl.rst @@ -0,0 +1,302 @@ +Compute Express Link (CXL) +========================== +From the view of a single host, CXL is an interconnect standard that +targets accelerators and memory devices attached to a CXL host. +This description will focus on those aspects visible either to +software running on a QEMU emulated host or to the internals of +functional emulation. As such, it will skip over many of the +electrical and protocol elements that would be more of interest +for real hardware and will dominate more general introductions to CXL. +It will also completely ignore the fabric management aspects of CXL +by considering only a single host and a static configuration. + +CXL shares many concepts and much of the infrastructure of PCI Express, +with CXL Host Bridges, which have CXL Root Ports which may be directly +attached to CXL or PCI End Points. Alternatively there may be CXL Switches +with CXL and PCI Endpoints attached below them. In many cases additional +control and capabilities are exposed via PCI Express interfaces. +This sharing of interfaces and hence emulation code is is reflected +in how the devices are emulated in QEMU. In most cases the various +CXL elements are built upon an equivalent PCIe devices. + +CXL devices support the following interfaces: + +* Most conventional PCIe interfaces + + - Configuration space access + - BAR mapped memory accesses used for registers and mailboxes. + - MSI/MSI-X + - AER + - DOE mailboxes + - IDE + - Many other PCI express defined interfaces.. + +* Memory operations + + - Equivalent of accessing DRAM / NVDIMMs. Any access / feature + supported by the host for normal memory should also work for + CXL attached memory devices. + +* Cache operations. The are mostly irrelevant to QEMU emulation as + QEMU is not emulating a coherency protocol. Any emulation related + to these will be device specific and is out of the scope of this + document. + +CXL 2.0 Device Types +-------------------- +CXL 2.0 End Points are often categorized into three types. + +**Type 1:** These support coherent caching of host memory. Example might +be a crypto accelerators. May also have device private memory accessible +via means such as PCI memory reads and writes to BARs. + +**Type 2:** These support coherent caching of host memory and host +managed device memory (HDM) for which the coherency protocol is managed +by the host. This is a complex topic, so for more information on CXL +coherency see the CXL 2.0 specification. + +**Type 3 Memory devices:** These devices act as a means of attaching +additional memory (HDM) to a CXL host including both volatile and +persistent memory. The CXL topology may support interleaving across a +number of Type 3 memory devices using HDM Decoders in the host, host +bridge, switch upstream port and endpoints. + +Scope of CXL emulation in QEMU +------------------------------ +The focus of CXL emulation is CXL revision 2.0 and later. Earlier CXL +revisions defined a smaller set of features, leaving much of the control +interface as implementation defined or device specific, making generic +emulation challenging with host specific firmware being responsible +for setup and the Endpoints being presented to operating systems +as Root Complex Integrated End Points. CXL rev 2.0 looks a lot +more like PCI Express, with fully specified discoverability +of the CXL topology. + +CXL System components +---------------------- +A CXL system is made up a Host with a number of 'standard components' +the control and capabilities of which are discoverable by system software +using means described in the CXL 2.0 specification. + +CXL Fixed Memory Windows (CFMW) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A CFMW consists of a particular range of Host Physical Address space +which is routed to particular CXL Host Bridges. At time of generic +software initialization it will have a particularly interleaving +configuration and associated Quality of Serice Throtling Group (QTG). +This information is available to system software, when making +decisions about how to configure interleave across available CXL +memory devices. It is provide as CFMW Structures (CFMWS) in +the CXL Early Discovery Table, an ACPI table. + +Note: QTG 0 is the only one currently supported in QEMU. + +CXL Host Bridge (CXL HB) +~~~~~~~~~~~~~~~~~~~~~~~~ +A CXL host bridge is similar to the PCIe equivalent, but with a +specification defined register interface called CXL Host Bridge +Component Registers (CHBCR). The location of this CHBCR MMIO +space is described to system software via a CXL Host Bridge +Structure (CHBS) in the CEDT ACPI table. The actual interfaces +are identical to those used for other parts of the CXL heirarchy +as CXL Component Registers in PCI BARs. + +Interfaces provided include: + +* Configuration of HDM Decoders to route CXL Memory accesses with + a particularly Host Physical Address range to the target port + below which the CXL device servicing that address lies. This + may be a mapping to a single Root Port (RP) or across a set of + target RPs. + +CXL Root Ports (CXL RP) +~~~~~~~~~~~~~~~~~~~~~~~ +A CXL Root Port servers te same purpose as a PCIe Root Port. +There are a number of CXL specific Designated Vendor Specific +Extended Capabilities (DVSEC) in PCIe Configuration Space +and associated component register access via PCI bars. + +CXL Switch +~~~~~~~~~~ +Not yet implemented in QEMU. + +Here we consider a simple CXL switch with only a single +virtual hierarchy. Whilst more complex devices exist, their +visibility to a particular host is generally the same as for +a simple switch design. Hosts often have no awareness +of complex rerouting and device pooling, they simply see +devices being hot added or hot removed. + +A CXL switch has a similar architecture to those in PCIe, +with a single upstream port, internal PCI bus and multiple +downstream ports. + +Both the CXL upstream and downstream ports have CXL specific +DVSECs in configuration space, and component registers in PCI +BARs. The Upstream Port has the configuration interfaces for +the HDM decoders which route incoming memory accesses to the +appropriate downstream port. + +CXL Memory Devices - Type 3 +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +CXL type 3 devices use a PCI class code and are intended to be supported +by a generic operating system driver. They have HDM decoders +though in these EP devices, the decoder is reponsible not for +routing but for translation of the incoming host physical address (HPA) +into a Device Physical Address (DPA). + +CXL Memory Interleave +--------------------- +To understand the interaction of different CXL hardware components which +are emulated in QEMU, let us consider a memory read in a fully configured +CXL topology. Note that system software is responsible for configuration +of all components with the exception of the CFMWs. System software is +responsible for allocating appropriate ranges from within the CFMWs +and exposing those via normal memory configurations as would be done +for system RAM. + +Example system Topology. x marks the match in each decoder level:: + + |<------------------SYSTEM PHYSICAL ADDRESS MAP (1)----------------->| + | __________ __________________________________ __________ | + | | | | | | | | + | | CFMW 0 | | CXL Fixed Memory Window 1 | | CFMW 1 | | + | | HB0 only | | Configured to interleave memory | | HB1 only | | + | | | | memory accesses across HB0/HB1 | | | | + | |__________| |_____x____________________________| |__________| | + | | | | + | | | | + | | | | + | Interleave Decoder | | + | Matches this HB | | + \_____________| |_____________/ + __________|__________ _____|_______________ + | | | | + (2) | CXL HB 0 | | CXL HB 1 | + | HB IntLv Decoders | | HB IntLv Decoders | + | PCI/CXL Root Bus 0c | | PCI/CXL Root Bus 0d | + | | | | + |___x_________________| |_____________________| + | | | | + | | | | + A HB 0 HDM Decoder | | | + matches this Port | | | + | | | | + ___________|___ __________|__ __|_________ ___|_________ + (3)| Root Port 0 | | Root Port 1 | | Root Port 2| | Root Port 3 | + | Appears in | | Appears in | | Appears in | | Appear in | + | PCI topology | | PCI Topology| | PCI Topo | | PCI Topo | + | As 0c:00.0 | | as 0c:01.0 | | as de:00.0 | | as de:01.0 | + |_______________| |_____________| |____________| |_____________| + | | | | + | | | | + _____|_________ ______|______ ______|_____ ______|_______ + (4)| x | | | | | | | + | CXL Type3 0 | | CXL Type3 1 | | CXL type3 2| | CLX Type 3 3 | + | | | | | | | | + | PMEM0(Vol LSA)| | PMEM1 (...) | | PMEM2 (...)| | PMEM3 (...) | + | Decoder to go | | | | | | | + | from host PA | | PCI 0e:00.0 | | PCI df:00.0| | PCI e0:00.0 | + | to device PA | | | | | | | + | PCI as 0d:00.0| | | | | | | + |_______________| |_____________| |____________| |______________| + +Notes: + +(1) **3 CXL Fixed Memory Windows (CFMW)** corresponding to different + ranges of the system physical address map. Each CFMW has + particular interleave setup across the CXL Host Bridges (HB) + CFMW0 provides uninterleaved access to HB0, CFW2 provides + uninterleaved acess to HB1. CFW1 provides interleaved memory access + across HB0 and HB1. + +(2) **Two CXL Host Bridges**. Each of these has 2 CXL Root Ports and + programmable HDM decoders to route memory accesses either to + a single port or interleave them across multiple ports. + A complex configuration here, might be to use the following HDM + decoders in HB0. HDM0 routes CFMW0 requests to RP0 and hence + part of CXL Type3 0. HDM1 routes CFMW0 requests from a + different region of the CFMW0 PA range to RP2 and hence part + of CXL Type 3 1. HDM2 routes yet another PA range from within + CFMW0 to be interleaved across RP0 and RP1, providing 2 way + interleave of part of the memory provided by CXL Type3 0 and + CXL Type 3 1. HDM3 routes those interleaved accesses from + CFMW1 that target HB0 to RP 0 and another part of the memory of + CXL Type 3 0 (as part of a 2 way interleave at the system level + across for example CXL Type3 0 and CXL Type3 2. + HDM4 is used to enable system wide 4 way interleave across all + the present CXL type3 devices, by interleaving those (interleaved) + requests that HB0 receives from from CFMW1 across RP 0 and + RP 1 and hence to yet more regions of the memory of the + attached Type3 devices. Note this is a representative subset + of the full range of possible HDM decoder configurations in this + topology. + +(3) **Four CXL Root Ports.** In this case the CXL Type 3 devices are + directly attached to these ports. + +(4) **Four CXL Type3 memory expansion devices.** These will each have + HDM decoders, but in this case rather than performing interleave + they will take the Host Physical Addresses of accesses and map + them to their own local Device Physical Address Space (DPA). + +Example command lines +--------------------- +A very simple setup with just one directly attached CXL Type 3 device:: + + qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + ... + -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest.raw,size=256M \ + -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ + -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -cxl-fixed-memory-window targets.0=cxl.1,size=4G + +A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way +interleave across 2 CXL host bridges. Each host bridge has 2 CXL Root Ports, with +the CXL Type3 device directly attached (no switches).:: + + qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + ... + -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest.raw,size=256M \ + -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M \ + -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M \ + -object memory-backend-file,id=cxl-mem4,share=on,mem-path=/tmp/cxltest4.raw,size=256M \ + -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \ + -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M \ + -object memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M \ + -object memory-backend-file,id=cxl-lsa4,share=on,mem-path=/tmp/lsa4.raw,size=256M \ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ + -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \ + -device cxl-type3,bus=root_port14,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \ + -device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \ + -device cxl-type3,bus=root_port15,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \ + -device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \ + -device cxl-type3,bus=root_port16,memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \ + -cxl-fixed-memory-window targets.0=cxl.1,targets.1=cxl.2,size=4G,interleave-granularity=8k + +Kernel Configuration Options +---------------------------- + +In Linux 5.18 the followings options are necessary to make use of +OS management of CXL memory devices as described here. + +* CONFIG_CXL_BUS +* CONFIG_CXL_PCI +* CONFIG_CXL_ACPI +* CONFIG_CXL_PMEM +* CONFIG_CXL_MEM +* CONFIG_CXL_PORT +* CONFIG_CXL_REGION + +References +---------- + + - Consortium website for specifications etc: + http://www.computeexpresslink.org + - Compute Express link Revision 2 specification, October 2020 + - CEDT CFMWS & QTG _DSM ECN May 2021 diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst index 4c950f6199..8e08a29e89 100644 --- a/docs/tools/qemu-nbd.rst +++ b/docs/tools/qemu-nbd.rst @@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified. .. option:: -e, --shared=NUM Allow up to *NUM* clients to share the device (default - ``1``), 0 for unlimited. Safe for readers, but for now, - consistency is not guaranteed between multiple writers. + ``1``), 0 for unlimited. .. option:: -t, --persistent diff --git a/hmp-commands.hx b/hmp-commands.hx index 03e6a73d1f..564f1de364 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1269,7 +1269,11 @@ ERST { .name = "netdev_add", .args_type = "netdev:O", - .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]", + .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user" +#ifdef CONFIG_VMNET + "|vmnet-host|vmnet-shared|vmnet-bridged" +#endif + "],id=str[,prop=value][,...]", .help = "add host network device", .cmd = hmp_netdev_add, .command_completion = netdev_add_completion, diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 54ee93b71f..5f522e68e9 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -216,7 +216,7 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) } v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag); - virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size); + virtio_init(vdev, VIRTIO_ID_9P, v->config_size); v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output); } diff --git a/hw/Kconfig b/hw/Kconfig index ad20cce0a9..50e0952889 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -6,6 +6,7 @@ source audio/Kconfig source block/Kconfig source char/Kconfig source core/Kconfig +source cxl/Kconfig source display/Kconfig source dma/Kconfig source gpio/Kconfig diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index 19caebde6c..3703aca212 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -5,6 +5,7 @@ config ACPI_X86 bool select ACPI select ACPI_NVDIMM + select ACPI_CXL select ACPI_CPU_HOTPLUG select ACPI_MEMORY_HOTPLUG select ACPI_HMAT @@ -66,3 +67,7 @@ config ACPI_ERST bool default y depends on ACPI && PCI + +config ACPI_CXL + bool + depends on ACPI diff --git a/hw/acpi/cxl-stub.c b/hw/acpi/cxl-stub.c new file mode 100644 index 0000000000..15bc21076b --- /dev/null +++ b/hw/acpi/cxl-stub.c @@ -0,0 +1,12 @@ + +/* + * Stubs for ACPI platforms that don't support CXl + */ +#include "qemu/osdep.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/cxl.h" + +void build_cxl_osc_method(Aml *dev) +{ + g_assert_not_reached(); +} diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c new file mode 100644 index 0000000000..31d5235136 --- /dev/null +++ b/hw/acpi/cxl.c @@ -0,0 +1,257 @@ +/* + * CXL ACPI Implementation + * + * Copyright(C) 2020 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "hw/cxl/cxl.h" +#include "hw/mem/memory-device.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/bios-linker-loader.h" +#include "hw/acpi/cxl.h" +#include "qapi/error.h" +#include "qemu/uuid.h" + +static void cedt_build_chbs(GArray *table_data, PXBDev *cxl) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge); + struct MemoryRegion *mr = sbd->mmio[0].memory; + + /* Type */ + build_append_int_noprefix(table_data, 0, 1); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + + /* Record Length */ + build_append_int_noprefix(table_data, 32, 2); + + /* UID - currently equal to bus number */ + build_append_int_noprefix(table_data, cxl->bus_nr, 4); + + /* Version */ + build_append_int_noprefix(table_data, 1, 4); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + + /* Base - subregion within a container that is in PA space */ + build_append_int_noprefix(table_data, mr->container->addr + mr->addr, 8); + + /* Length */ + build_append_int_noprefix(table_data, memory_region_size(mr), 8); +} + +/* + * CFMWS entries in CXL 2.0 ECN: CEDT CFMWS & QTG _DSM. + * Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory + * interleaving. + */ +static void cedt_build_cfmws(GArray *table_data, MachineState *ms) +{ + CXLState *cxls = ms->cxl_devices_state; + GList *it; + + for (it = cxls->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + int i; + + /* Type */ + build_append_int_noprefix(table_data, 1, 1); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + + /* Record Length */ + build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + + /* Base HPA */ + build_append_int_noprefix(table_data, fw->mr.addr, 8); + + /* Window Size */ + build_append_int_noprefix(table_data, fw->size, 8); + + /* Host Bridge Interleave Ways */ + build_append_int_noprefix(table_data, fw->enc_int_ways, 1); + + /* Host Bridge Interleave Arithmetic */ + build_append_int_noprefix(table_data, 0, 1); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + + /* Host Bridge Interleave Granularity */ + build_append_int_noprefix(table_data, fw->enc_int_gran, 4); + + /* Window Restrictions */ + build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */ + + /* QTG ID */ + build_append_int_noprefix(table_data, 0, 2); + + /* Host Bridge List (list of UIDs - currently bus_nr) */ + for (i = 0; i < fw->num_targets; i++) { + g_assert(fw->target_hbs[i]); + build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 4); + } + } +} + +static int cxl_foreach_pxb_hb(Object *obj, void *opaque) +{ + Aml *cedt = opaque; + + if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) { + cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj)); + } + + return 0; +} + +void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data, + BIOSLinker *linker, const char *oem_id, + const char *oem_table_id) +{ + Aml *cedt; + AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; + + acpi_add_table(table_offsets, table_data); + acpi_table_begin(&table, table_data); + cedt = init_aml_allocator(); + + /* reserve space for CEDT header */ + + object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt); + cedt_build_cfmws(cedt->buf, ms); + + /* copy AML table into ACPI tables blob and patch header there */ + g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len); + free_aml_allocator(); + + acpi_table_end(linker, &table); +} + +static Aml *__build_cxl_osc_method(void) +{ + Aml *method, *if_uuid, *else_uuid, *if_arg1_not_1, *if_cxl, *if_caps_masked; + Aml *a_ctrl = aml_local(0); + Aml *a_cdw1 = aml_name("CDW1"); + + method = aml_method("_OSC", 4, AML_NOTSERIALIZED); + /* CDW1 is used for the return value so is present whether or not a match occurs */ + aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); + + /* + * Generate shared section between: + * CXL 2.0 - 9.14.2.1.4 and + * PCI Firmware Specification 3.0 + * 4.5.1. _OSC Interface for PCI Host Bridge Devices + * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is + * identified by the Universal Unique IDentifier (UUID) + * 33DB4D5B-1FF7-401C-9657-7441C03DD766 + * The _OSC interface for a CXL Host bridge is + * identified by the UUID 68F2D50B-C469-4D8A-BD3D-941A103FD3FC + * A CXL Host bridge is compatible with a PCI host bridge so + * for the shared section match both. + */ + if_uuid = aml_if( + aml_lor(aml_equal(aml_arg(0), + aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766")), + aml_equal(aml_arg(0), + aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC")))); + aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); + aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); + + aml_append(if_uuid, aml_store(aml_name("CDW3"), a_ctrl)); + + /* + * + * Allows OS control for all 5 features: + * PCIeHotplug SHPCHotplug PME AER PCIeCapability + */ + aml_append(if_uuid, aml_and(a_ctrl, aml_int(0x1F), a_ctrl)); + + /* + * Check _OSC revision. + * PCI Firmware specification 3.3 and CXL 2.0 both use revision 1 + * Unknown Revision is CDW1 - BIT (3) + */ + if_arg1_not_1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1)))); + aml_append(if_arg1_not_1, aml_or(a_cdw1, aml_int(0x08), a_cdw1)); + aml_append(if_uuid, if_arg1_not_1); + + if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); + + /* Capability bits were masked */ + aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1)); + aml_append(if_uuid, if_caps_masked); + + aml_append(if_uuid, aml_store(aml_name("CDW2"), aml_name("SUPP"))); + aml_append(if_uuid, aml_store(aml_name("CDW3"), aml_name("CTRL"))); + + /* Update DWORD3 (the return value) */ + aml_append(if_uuid, aml_store(a_ctrl, aml_name("CDW3"))); + + /* CXL only section as per CXL 2.0 - 9.14.2.1.4 */ + if_cxl = aml_if(aml_equal( + aml_arg(0), aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC"))); + /* CXL support field */ + aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(12), "CDW4")); + /* CXL capabilities */ + aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(16), "CDW5")); + aml_append(if_cxl, aml_store(aml_name("CDW4"), aml_name("SUPC"))); + aml_append(if_cxl, aml_store(aml_name("CDW5"), aml_name("CTRC"))); + + /* CXL 2.0 Port/Device Register access */ + aml_append(if_cxl, + aml_or(aml_name("CDW5"), aml_int(0x1), aml_name("CDW5"))); + aml_append(if_uuid, if_cxl); + + aml_append(if_uuid, aml_return(aml_arg(3))); + aml_append(method, if_uuid); + + /* + * If no UUID matched, return Unrecognized UUID via Arg3 DWord 1 + * ACPI 6.4 - 6.2.11 + * Unrecognised UUID - BIT(2) + */ + else_uuid = aml_else(); + + aml_append(else_uuid, + aml_or(aml_name("CDW1"), aml_int(0x4), aml_name("CDW1"))); + aml_append(else_uuid, aml_return(aml_arg(3))); + aml_append(method, else_uuid); + + return method; +} + +void build_cxl_osc_method(Aml *dev) +{ + aml_append(dev, aml_name_decl("SUPP", aml_int(0))); + aml_append(dev, aml_name_decl("CTRL", aml_int(0))); + aml_append(dev, aml_name_decl("SUPC", aml_int(0))); + aml_append(dev, aml_name_decl("CTRC", aml_int(0))); + aml_append(dev, __build_cxl_osc_method()); +} diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index 8bea2e6933..cea2f5f93a 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -13,6 +13,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_false: files('acpi-mem-hotplu acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_true: files('nvdimm.c')) acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) +acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) @@ -33,4 +34,5 @@ softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss) softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c', 'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c', 'acpi-mem-hotplug-stub.c', 'acpi-cpu-hotplug-stub.c', - 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c')) + 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c', + 'cxl-stub.c')) diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 97f3b38019..219262a8da 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -29,6 +29,7 @@ config ARM_VIRT select ACPI_APEI select ACPI_VIOT select VIRTIO_MEM_SUPPORTED + select ACPI_CXL config CHEETAH bool diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index 9c1cafae86..692c94ceb4 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -1365,7 +1365,7 @@ static void n8x0_init(MachineState *machine, } if (option_rom[0].name && - (machine->boot_order[0] == 'n' || !machine->kernel_filename)) { + (machine->boot_config.order[0] == 'n' || !machine->kernel_filename)) { uint8_t *nolo_tags = g_new(uint8_t, 0x10000); /* No, wait, better start at the ROM. */ s->mpu->cpu->env.regs[15] = OMAP2_Q2_BASE + 0x400000; diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c index bc77e3d8c9..f38117057b 100644 --- a/hw/audio/intel-hda.c +++ b/hw/audio/intel-hda.c @@ -1311,17 +1311,16 @@ static const TypeInfo hda_codec_device_type_info = { * create intel hda controller with codec attached to it, * so '-soundhw hda' works. */ -static int intel_hda_and_codec_init(PCIBus *bus) +static int intel_hda_and_codec_init(PCIBus *bus, const char *audiodev) { DeviceState *controller; BusState *hdabus; DeviceState *codec; - warn_report("'-soundhw hda' is deprecated, " - "please use '-device intel-hda -device hda-duplex' instead"); controller = DEVICE(pci_create_simple(bus, -1, "intel-hda")); hdabus = QLIST_FIRST(&controller->child_bus); codec = qdev_new("hda-duplex"); + qdev_prop_set_string(codec, "audiodev", audiodev); qdev_realize_and_unref(codec, hdabus, &error_fatal); return 0; } diff --git a/hw/audio/soundhw.c b/hw/audio/soundhw.c index f7d94d7dfa..94d9463e42 100644 --- a/hw/audio/soundhw.c +++ b/hw/audio/soundhw.c @@ -25,7 +25,9 @@ #include "qemu/option.h" #include "qemu/help_option.h" #include "qemu/error-report.h" +#include "qapi/error.h" #include "qom/object.h" +#include "hw/qdev-properties.h" #include "hw/isa/isa.h" #include "hw/pci/pci.h" #include "hw/audio/soundhw.h" @@ -34,16 +36,15 @@ struct soundhw { const char *name; const char *descr; const char *typename; - int enabled; int isa; - int (*init_pci) (PCIBus *bus); + int (*init_pci) (PCIBus *bus, const char *audiodev); }; static struct soundhw soundhw[9]; static int soundhw_count; void pci_register_soundhw(const char *name, const char *descr, - int (*init_pci)(PCIBus *bus)) + int (*init_pci)(PCIBus *bus, const char *audiodev)) { assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); soundhw[soundhw_count].name = name; @@ -64,95 +65,78 @@ void deprecated_register_soundhw(const char *name, const char *descr, soundhw_count++; } -void select_soundhw(const char *optarg) +void show_valid_soundhw(void) { struct soundhw *c; - if (is_help_option(optarg)) { - show_valid_cards: - - if (soundhw_count) { - printf("Valid sound card names (comma separated):\n"); - for (c = soundhw; c->name; ++c) { - printf ("%-11s %s\n", c->name, c->descr); - } - printf("\n-soundhw all will enable all of the above\n"); - } else { - printf("Machine has no user-selectable audio hardware " - "(it may or may not have always-present audio hardware).\n"); - } - exit(!is_help_option(optarg)); + if (soundhw_count) { + printf("Valid sound card names (comma separated):\n"); + for (c = soundhw; c->name; ++c) { + printf ("%-11s %s\n", c->name, c->descr); + } + } else { + printf("Machine has no user-selectable audio hardware " + "(it may or may not have always-present audio hardware).\n"); } - else { - size_t l; - const char *p; - char *e; - int bad_card = 0; +} - if (!strcmp(optarg, "all")) { - for (c = soundhw; c->name; ++c) { - c->enabled = 1; - } - return; - } +static struct soundhw *selected = NULL; +static const char *audiodev_id; - p = optarg; - while (*p) { - e = strchr(p, ','); - l = !e ? strlen(p) : (size_t) (e - p); +void select_soundhw(const char *optarg, const char *audiodev) +{ + struct soundhw *c; - for (c = soundhw; c->name; ++c) { - if (!strncmp(c->name, p, l) && !c->name[l]) { - c->enabled = 1; - break; - } - } + if (selected) { + error_setg(&error_fatal, "only one -soundhw option is allowed"); + } - if (!c->name) { - if (l > 80) { - error_report("Unknown sound card name (too big to show)"); - } - else { - error_report("Unknown sound card name `%.*s'", - (int) l, p); - } - bad_card = 1; - } - p += l + (e != NULL); + for (c = soundhw; c->name; ++c) { + if (g_str_equal(c->name, optarg)) { + selected = c; + audiodev_id = audiodev; + break; } + } - if (bad_card) { - goto show_valid_cards; - } + if (!c->name) { + error_report("Unknown sound card name `%s'", optarg); + show_valid_soundhw(); + exit(1); } } void soundhw_init(void) { - struct soundhw *c; + struct soundhw *c = selected; ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL); PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL); + BusState *bus; - for (c = soundhw; c->name; ++c) { - if (c->enabled) { - if (c->typename) { - warn_report("'-soundhw %s' is deprecated, " - "please use '-device %s' instead", - c->name, c->typename); - if (c->isa) { - isa_create_simple(isa_bus, c->typename); - } else { - pci_create_simple(pci_bus, -1, c->typename); - } - } else { - assert(!c->isa); - if (!pci_bus) { - error_report("PCI bus not available for %s", c->name); - exit(1); - } - c->init_pci(pci_bus); - } + if (!c) { + return; + } + if (c->isa) { + if (!isa_bus) { + error_report("ISA bus not available for %s", c->name); + exit(1); + } + bus = BUS(isa_bus); + } else { + if (!pci_bus) { + error_report("PCI bus not available for %s", c->name); + exit(1); } + bus = BUS(pci_bus); + } + + if (c->typename) { + DeviceState *dev = qdev_new(c->typename); + qdev_prop_set_string(dev, "audiodev", audiodev_id); + qdev_realize_and_unref(dev, bus, &error_fatal); + } else { + assert(!c->isa); + c->init_pci(pci_bus, audiodev_id); } } diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 347875a0cd..57bb355794 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -1530,6 +1530,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) int tmp; fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); tmp = (fdctrl->fifo[6] - ks + 1); + if (tmp < 0) { + FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + } if (fdctrl->fifo[0] & 0x80) tmp += fdctrl->fifo[6]; fdctrl->data_len *= tmp; diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 1a42ae9187..5dca4eab09 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -491,7 +491,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + virtio_init(vdev, VIRTIO_ID_BLOCK, sizeof(struct virtio_blk_config)); s->virtqs = g_new(VirtQueue *, s->num_queues); @@ -569,6 +569,12 @@ static void vhost_user_blk_instance_init(Object *obj) "/disk@0,0", DEVICE(obj)); } +static struct vhost_dev *vhost_user_blk_get_vhost(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + return &s->dev; +} + static const VMStateDescription vmstate_vhost_user_blk = { .name = "vhost-user-blk", .minimum_version_id = 1, @@ -603,6 +609,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_user_blk_get_features; vdc->set_status = vhost_user_blk_set_status; vdc->reset = vhost_user_blk_reset; + vdc->get_vhost = vhost_user_blk_get_vhost; } static const TypeInfo vhost_user_blk_info = { diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 540c38f829..cd804795c6 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1206,7 +1206,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) virtio_blk_set_config_size(s, s->host_features); - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size); + virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); s->blk = conf->conf.blk; s->rq = NULL; @@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) for (i = 0; i < conf->num_queues; i++) { virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); } - qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size - / 2); + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); if (err != NULL) { error_propagate(errp, err); @@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) for (i = 0; i < conf->num_queues; i++) { virtio_del_queue(vdev, i); } - qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size - / 2); + qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); qemu_del_vm_change_state_handler(s->change); blockdev_mark_auto_del(s->blk); virtio_cleanup(vdev); diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 6048d408b8..7d4601cb5d 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -1044,8 +1044,7 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp) VIRTIO_CONSOLE_F_EMERG_WRITE)) { config_size = offsetof(struct virtio_console_config, emerg_wr); } - virtio_init(vdev, "virtio-serial", VIRTIO_ID_CONSOLE, - config_size); + virtio_init(vdev, VIRTIO_ID_CONSOLE, config_size); /* Spawn a new virtio-serial bus on which the ports will ride as devices */ qbus_init(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS, diff --git a/hw/core/machine.c b/hw/core/machine.c index 700c1e76b8..b03d9192ba 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -21,21 +21,25 @@ #include "qapi/qapi-visit-common.h" #include "qapi/qapi-visit-machine.h" #include "qapi/visitor.h" +#include "qom/object_interfaces.h" #include "hw/sysbus.h" #include "sysemu/cpus.h" #include "sysemu/sysemu.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" #include "sysemu/numa.h" +#include "sysemu/xen.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" +#include "hw/cxl/cxl.h" #include "migration/global_state.h" #include "migration/vmstate.h" #include "exec/confidential-guest-support.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" +#include "qom/object_interfaces.h" GlobalProperty hw_compat_7_0[] = {}; const size_t hw_compat_7_0_len = G_N_ELEMENTS(hw_compat_7_0); @@ -523,6 +527,78 @@ static void machine_set_hmat(Object *obj, bool value, Error **errp) ms->numa_state->hmat_enabled = value; } +static void machine_get_mem(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + MachineState *ms = MACHINE(obj); + MemorySizeConfiguration mem = { + .has_size = true, + .size = ms->ram_size, + .has_max_size = !!ms->ram_slots, + .max_size = ms->maxram_size, + .has_slots = !!ms->ram_slots, + .slots = ms->ram_slots, + }; + MemorySizeConfiguration *p_mem = &mem; + + visit_type_MemorySizeConfiguration(v, name, &p_mem, &error_abort); +} + +static void machine_set_mem(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + MachineState *ms = MACHINE(obj); + MachineClass *mc = MACHINE_GET_CLASS(obj); + MemorySizeConfiguration *mem; + + ERRP_GUARD(); + + if (!visit_type_MemorySizeConfiguration(v, name, &mem, errp)) { + return; + } + + if (!mem->has_size) { + mem->has_size = true; + mem->size = mc->default_ram_size; + } + mem->size = QEMU_ALIGN_UP(mem->size, 8192); + if (mc->fixup_ram_size) { + mem->size = mc->fixup_ram_size(mem->size); + } + if ((ram_addr_t)mem->size != mem->size) { + error_setg(errp, "ram size too large"); + goto out_free; + } + + if (mem->has_max_size) { + if (mem->max_size < mem->size) { + error_setg(errp, "invalid value of maxmem: " + "maximum memory size (0x%" PRIx64 ") must be at least " + "the initial memory size (0x%" PRIx64 ")", + mem->max_size, mem->size); + goto out_free; + } + if (mem->has_slots && mem->slots && mem->max_size == mem->size) { + error_setg(errp, "invalid value of maxmem: " + "memory slots were specified but maximum memory size " + "(0x%" PRIx64 ") is equal to the initial memory size " + "(0x%" PRIx64 ")", mem->max_size, mem->size); + goto out_free; + } + ms->maxram_size = mem->max_size; + } else { + if (mem->has_slots) { + error_setg(errp, "slots specified but no max-size"); + goto out_free; + } + ms->maxram_size = mem->size; + } + ms->ram_size = mem->size; + ms->ram_slots = mem->has_slots ? mem->slots : 0; +out_free: + qapi_free_MemorySizeConfiguration(mem); +} + static char *machine_get_nvdimm_persistence(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -550,6 +626,20 @@ static void machine_set_nvdimm_persistence(Object *obj, const char *value, nvdimms_state->persistence_string = g_strdup(value); } +static bool machine_get_cxl(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->cxl_devices_state->is_enabled; +} + +static void machine_set_cxl(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->cxl_devices_state->is_enabled = value; +} + void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type) { QAPI_LIST_PREPEND(mc->allowed_dynamic_sysbus_devices, g_strdup(type)); @@ -581,21 +671,6 @@ bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type) return allowed; } -static char *machine_get_memdev(Object *obj, Error **errp) -{ - MachineState *ms = MACHINE(obj); - - return g_strdup(ms->ram_memdev_id); -} - -static void machine_set_memdev(Object *obj, const char *value, Error **errp) -{ - MachineState *ms = MACHINE(obj); - - g_free(ms->ram_memdev_id); - ms->ram_memdev_id = g_strdup(value); -} - HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) { int i; @@ -784,6 +859,65 @@ static void machine_set_smp(Object *obj, Visitor *v, const char *name, machine_parse_smp_config(ms, config, errp); } +static void machine_get_boot(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + MachineState *ms = MACHINE(obj); + BootConfiguration *config = &ms->boot_config; + visit_type_BootConfiguration(v, name, &config, &error_abort); +} + +static void machine_free_boot_config(MachineState *ms) +{ + g_free(ms->boot_config.order); + g_free(ms->boot_config.once); + g_free(ms->boot_config.splash); +} + +static void machine_copy_boot_config(MachineState *ms, BootConfiguration *config) +{ + MachineClass *machine_class = MACHINE_GET_CLASS(ms); + + machine_free_boot_config(ms); + ms->boot_config = *config; + if (!config->has_order) { + ms->boot_config.has_order = true; + ms->boot_config.order = g_strdup(machine_class->default_boot_order); + } +} + +static void machine_set_boot(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ERRP_GUARD(); + MachineState *ms = MACHINE(obj); + BootConfiguration *config = NULL; + + if (!visit_type_BootConfiguration(v, name, &config, errp)) { + return; + } + if (config->has_order) { + validate_bootdevices(config->order, errp); + if (*errp) { + goto out_free; + } + } + if (config->has_once) { + validate_bootdevices(config->once, errp); + if (*errp) { + goto out_free; + } + } + + machine_copy_boot_config(ms, config); + /* Strings live in ms->boot_config. */ + free(config); + return; + +out_free: + qapi_free_BootConfiguration(config); +} + static void machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -792,6 +926,8 @@ static void machine_class_init(ObjectClass *oc, void *data) mc->default_ram_size = 128 * MiB; mc->rom_file_has_mr = true; + /* Few machines support CXL, so default to off */ + mc->cxl_supported = false; /* numa node memory size aligned on 8MB by default. * On Linux, each node's border has to be 8MB aligned */ @@ -822,6 +958,12 @@ static void machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "dumpdtb", "Dump current dtb to a file and quit"); + object_class_property_add(oc, "boot", "BootConfiguration", + machine_get_boot, machine_set_boot, + NULL, NULL); + object_class_property_set_description(oc, "boot", + "Boot configuration"); + object_class_property_add(oc, "smp", "SMPConfiguration", machine_get_smp, machine_set_smp, NULL, NULL); @@ -883,11 +1025,18 @@ static void machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "memory-encryption", "Set memory encryption object to use"); - object_class_property_add_str(oc, "memory-backend", - machine_get_memdev, machine_set_memdev); + object_class_property_add_link(oc, "memory-backend", TYPE_MEMORY_BACKEND, + offsetof(MachineState, memdev), object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); object_class_property_set_description(oc, "memory-backend", "Set RAM backend" "Valid value is ID of hostmem based backend"); + + object_class_property_add(oc, "memory", "MemorySizeConfiguration", + machine_get_mem, machine_set_mem, + NULL, NULL); + object_class_property_set_description(oc, "memory", + "Memory size configuration"); } static void machine_class_base_init(ObjectClass *oc, void *data) @@ -918,6 +1067,8 @@ static void machine_initfn(Object *obj) ms->mem_merge = true; ms->enable_graphics = true; ms->kernel_cmdline = g_strdup(""); + ms->ram_size = mc->default_ram_size; + ms->maxram_size = mc->default_ram_size; if (mc->nvdimm_supported) { Object *obj = OBJECT(ms); @@ -937,6 +1088,16 @@ static void machine_initfn(Object *obj) "Valid values are cpu, mem-ctrl"); } + if (mc->cxl_supported) { + Object *obj = OBJECT(ms); + + ms->cxl_devices_state = g_new0(CXLState, 1); + object_property_add_bool(obj, "cxl", machine_get_cxl, machine_set_cxl); + object_property_set_description(obj, "cxl", + "Set on/off to enable/disable " + "CXL instantiation"); + } + if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { ms->numa_state = g_new0(NumaState, 1); object_property_add_bool(obj, "hmat", @@ -955,12 +1116,15 @@ static void machine_initfn(Object *obj) ms->smp.clusters = 1; ms->smp.cores = 1; ms->smp.threads = 1; + + machine_copy_boot_config(ms, &(BootConfiguration){ 0 }); } static void machine_finalize(Object *obj) { MachineState *ms = MACHINE(obj); + machine_free_boot_config(ms); g_free(ms->kernel_filename); g_free(ms->initrd_filename); g_free(ms->kernel_cmdline); @@ -971,6 +1135,7 @@ static void machine_finalize(Object *obj) g_free(ms->device_memory); g_free(ms->nvdimms_state); g_free(ms->numa_state); + g_free(ms->cxl_devices_state); } bool machine_usb(MachineState *machine) @@ -1122,7 +1287,40 @@ MemoryRegion *machine_consume_memdev(MachineState *machine, return ret; } -void machine_run_board_init(MachineState *machine) +static bool create_default_memdev(MachineState *ms, const char *path, Error **errp) +{ + Object *obj; + MachineClass *mc = MACHINE_GET_CLASS(ms); + bool r = false; + + obj = object_new(path ? TYPE_MEMORY_BACKEND_FILE : TYPE_MEMORY_BACKEND_RAM); + if (path) { + if (!object_property_set_str(obj, "mem-path", path, errp)) { + goto out; + } + } + if (!object_property_set_int(obj, "size", ms->ram_size, errp)) { + goto out; + } + object_property_add_child(object_get_objects_root(), mc->default_ram_id, + obj); + /* Ensure backend's memory region name is equal to mc->default_ram_id */ + if (!object_property_set_bool(obj, "x-use-canonical-path-for-ramblock-id", + false, errp)) { + goto out; + } + if (!user_creatable_complete(USER_CREATABLE(obj), errp)) { + goto out; + } + r = object_property_set_link(OBJECT(ms), "memory-backend", obj, errp); + +out: + object_unref(obj); + return r; +} + + +void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp) { MachineClass *machine_class = MACHINE_GET_CLASS(machine); ObjectClass *oc = object_class_by_name(machine->cpu_type); @@ -1133,11 +1331,26 @@ void machine_run_board_init(MachineState *machine) clock values from the log. */ replay_checkpoint(CHECKPOINT_INIT); - if (machine->ram_memdev_id) { - Object *o; - o = object_resolve_path_type(machine->ram_memdev_id, - TYPE_MEMORY_BACKEND, NULL); - machine->ram = machine_consume_memdev(machine, MEMORY_BACKEND(o)); + if (!xen_enabled()) { + /* On 32-bit hosts, QEMU is limited by virtual address space */ + if (machine->ram_size > (2047 << 20) && HOST_LONG_BITS == 32) { + error_setg(errp, "at most 2047 MB RAM can be simulated"); + return; + } + } + + if (machine->memdev) { + ram_addr_t backend_size = object_property_get_uint(OBJECT(machine->memdev), + "size", &error_abort); + if (backend_size != machine->ram_size) { + error_setg(errp, "Machine memory size does not match the size of the memory backend"); + return; + } + } else if (machine_class->default_ram_id && machine->ram_size && + numa_uses_legacy_mem()) { + if (!create_default_memdev(current_machine, mem_path, errp)) { + return; + } } if (machine->numa_state) { @@ -1147,6 +1360,10 @@ void machine_run_board_init(MachineState *machine) } } + if (!machine->ram && machine->memdev) { + machine->ram = machine_consume_memdev(machine, machine->memdev); + } + /* If the machine supports the valid_cpu_types check and the user * specified a CPU with -cpu check here that the user CPU is supported. */ @@ -1229,9 +1446,9 @@ void qdev_machine_creation_done(void) { cpu_synchronize_all_post_init(); - if (current_machine->boot_once) { - qemu_boot_set(current_machine->boot_once, &error_fatal); - qemu_register_reset(restore_boot_order, g_strdup(current_machine->boot_order)); + if (current_machine->boot_config.has_once) { + qemu_boot_set(current_machine->boot_config.once, &error_fatal); + qemu_register_reset(restore_boot_order, g_strdup(current_machine->boot_config.order)); } /* diff --git a/hw/core/numa.c b/hw/core/numa.c index 1aa05dcf42..26d8e5f616 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -695,7 +695,7 @@ void numa_complete_configuration(MachineState *ms) } if (!numa_uses_legacy_mem() && mc->default_ram_id) { - if (ms->ram_memdev_id) { + if (ms->memdev) { error_report("'-machine memory-backend' and '-numa memdev'" " properties are mutually exclusive"); exit(1); diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index c34aac6ebc..357b8761b5 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -428,6 +428,25 @@ const PropertyInfo qdev_prop_int64 = { .set_default_value = qdev_propinfo_set_default_value_int, }; +static void set_uint64_checkmask(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + Property *prop = opaque; + uint64_t *ptr = object_field_prop_ptr(obj, prop); + + visit_type_uint64(v, name, ptr, errp); + if (*ptr & ~prop->bitmask) { + error_setg(errp, "Property value for '%s' has bits outside mask '0x%" PRIx64 "'", + name, prop->bitmask); + } +} + +const PropertyInfo qdev_prop_uint64_checkmask = { + .name = "uint64", + .get = get_uint64, + .set = set_uint64_checkmask, +}; + /* --- string --- */ static void release_string(Object *obj, const char *name, void *opaque) diff --git a/hw/cxl/Kconfig b/hw/cxl/Kconfig new file mode 100644 index 0000000000..8e67519b16 --- /dev/null +++ b/hw/cxl/Kconfig @@ -0,0 +1,3 @@ +config CXL + bool + default y if PCI_EXPRESS diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c new file mode 100644 index 0000000000..7985c9bfca --- /dev/null +++ b/hw/cxl/cxl-component-utils.c @@ -0,0 +1,396 @@ +/* + * CXL Utility library for components + * + * Copyright(C) 2020 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "hw/pci/pci.h" +#include "hw/cxl/cxl.h" + +static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + + if (size == 8) { + qemu_log_mask(LOG_UNIMP, + "CXL 8 byte cache mem registers not implemented\n"); + return 0; + } + + if (cregs->special_ops && cregs->special_ops->read) { + return cregs->special_ops->read(cxl_cstate, offset, size); + } else { + return cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)]; + } +} + +static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, + uint32_t value) +{ + ComponentRegisters *cregs = &cxl_cstate->crb; + uint32_t *cache_mem = cregs->cache_mem_registers; + bool should_commit = false; + + switch (offset) { + case A_CXL_HDM_DECODER0_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + break; + default: + break; + } + + memory_region_transaction_begin(); + stl_le_p((uint8_t *)cache_mem + offset, value); + if (should_commit) { + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); + } + memory_region_transaction_commit(); +} + +static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + uint32_t mask; + + if (size == 8) { + qemu_log_mask(LOG_UNIMP, + "CXL 8 byte cache mem registers not implemented\n"); + return; + } + mask = cregs->cache_mem_regs_write_mask[offset / sizeof(*cregs->cache_mem_regs_write_mask)]; + value &= mask; + /* RO bits should remain constant. Done by reading existing value */ + value |= ~mask & cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)]; + if (cregs->special_ops && cregs->special_ops->write) { + cregs->special_ops->write(cxl_cstate, offset, value, size); + return; + } + + if (offset >= A_CXL_HDM_DECODER_CAPABILITY && + offset <= A_CXL_HDM_DECODER0_TARGET_LIST_HI) { + dumb_hdm_handler(cxl_cstate, offset, value); + } else { + cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)] = value; + } +} + +/* + * 8.2.3 + * The access restrictions specified in Section 8.2.2 also apply to CXL 2.0 + * Component Registers. + * + * 8.2.2 + * • A 32 bit register shall be accessed as a 4 Bytes quantity. Partial + * reads are not permitted. + * • A 64 bit register shall be accessed as a 8 Bytes quantity. Partial + * reads are not permitted. + * + * As of the spec defined today, only 4 byte registers exist. + */ +static const MemoryRegionOps cache_mem_ops = { + .read = cxl_cache_mem_read_reg, + .write = cxl_cache_mem_write_reg, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +void cxl_component_register_block_init(Object *obj, + CXLComponentState *cxl_cstate, + const char *type) +{ + ComponentRegisters *cregs = &cxl_cstate->crb; + + memory_region_init(&cregs->component_registers, obj, type, + CXL2_COMPONENT_BLOCK_SIZE); + + /* io registers controls link which we don't care about in QEMU */ + memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io", + CXL2_COMPONENT_IO_REGION_SIZE); + memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cregs, + ".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE); + + memory_region_add_subregion(&cregs->component_registers, 0, &cregs->io); + memory_region_add_subregion(&cregs->component_registers, + CXL2_COMPONENT_IO_REGION_SIZE, + &cregs->cache_mem); +} + +static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk) +{ + /* + * Error status is RW1C but given bits are not yet set, it can + * be handled as RO. + */ + reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0; + /* Bits 12-13 and 17-31 reserved in CXL 2.0 */ + reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff; + write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff; + reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff; + write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff; + reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0; + reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f; + write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f; + /* CXL switches and devices must set */ + reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00; +} + +static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk) +{ + int decoder_count = 1; + int i; + + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, + cxl_decoder_count_enc(decoder_count)); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 1); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 1); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL, + HDM_DECODER_ENABLE, 0); + write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3; + for (i = 0; i < decoder_count; i++) { + write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * 0x20] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * 0x20] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; + } +} + +void cxl_component_register_init_common(uint32_t *reg_state, uint32_t *write_msk, + enum reg_type type) +{ + int caps = 0; + + /* + * In CXL 2.0 the capabilities required for each CXL component are such that, + * with the ordering chosen here, a single number can be used to define + * which capabilities should be provided. + */ + switch (type) { + case CXL2_DOWNSTREAM_PORT: + case CXL2_DEVICE: + /* RAS, Link */ + caps = 2; + break; + case CXL2_UPSTREAM_PORT: + case CXL2_TYPE3_DEVICE: + case CXL2_LOGICAL_DEVICE: + /* + HDM */ + caps = 3; + break; + case CXL2_ROOT_PORT: + /* + Extended Security, + Snoop */ + caps = 5; + break; + default: + abort(); + } + + memset(reg_state, 0, CXL2_COMPONENT_CM_REGION_SIZE); + + /* CXL Capability Header Register */ + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ID, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, VERSION, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps); + +#define init_cap_reg(reg, id, version) \ + QEMU_BUILD_BUG_ON(CXL_##reg##_REGISTERS_OFFSET == 0); \ + do { \ + int which = R_CXL_##reg##_CAPABILITY_HEADER; \ + reg_state[which] = FIELD_DP32(reg_state[which], \ + CXL_##reg##_CAPABILITY_HEADER, ID, id); \ + reg_state[which] = \ + FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, \ + VERSION, version); \ + reg_state[which] = \ + FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, PTR, \ + CXL_##reg##_REGISTERS_OFFSET); \ + } while (0) + + init_cap_reg(RAS, 2, 2); + ras_init_common(reg_state, write_msk); + + init_cap_reg(LINK, 4, 2); + + if (caps < 3) { + return; + } + + init_cap_reg(HDM, 5, 1); + hdm_init_common(reg_state, write_msk); + + if (caps < 5) { + return; + } + + init_cap_reg(EXTSEC, 6, 1); + init_cap_reg(SNOOP, 8, 1); + +#undef init_cap_reg +} + +/* + * Helper to creates a DVSEC header for a CXL entity. The caller is responsible + * for tracking the valid offset. + * + * This function will build the DVSEC header on behalf of the caller and then + * copy in the remaining data for the vendor specific bits. + * It will also set up appropriate write masks. + */ +void cxl_component_create_dvsec(CXLComponentState *cxl, + enum reg_type cxl_dev_type, uint16_t length, + uint16_t type, uint8_t rev, uint8_t *body) +{ + PCIDevice *pdev = cxl->pdev; + uint16_t offset = cxl->dvsec_offset; + uint8_t *wmask = pdev->wmask; + + assert(offset >= PCI_CFG_SPACE_SIZE && + ((offset + length) < PCI_CFG_SPACE_EXP_SIZE)); + assert((length & 0xf000) == 0); + assert((rev & ~0xf) == 0); + + /* Create the DVSEC in the MCFG space */ + pcie_add_capability(pdev, PCI_EXT_CAP_ID_DVSEC, 1, offset, length); + pci_set_long(pdev->config + offset + PCIE_DVSEC_HEADER1_OFFSET, + (length << 20) | (rev << 16) | CXL_VENDOR_ID); + pci_set_word(pdev->config + offset + PCIE_DVSEC_ID_OFFSET, type); + memcpy(pdev->config + offset + sizeof(DVSECHeader), + body + sizeof(DVSECHeader), + length - sizeof(DVSECHeader)); + + /* Configure write masks */ + switch (type) { + case PCIE_CXL_DEVICE_DVSEC: + /* Cntrl RW Lock - so needs explicit blocking when lock is set */ + wmask[offset + offsetof(CXLDVSECDevice, ctrl)] = 0xFD; + wmask[offset + offsetof(CXLDVSECDevice, ctrl) + 1] = 0x4F; + /* Status is RW1CS */ + wmask[offset + offsetof(CXLDVSECDevice, ctrl2)] = 0x0F; + /* Lock is RW Once */ + wmask[offset + offsetof(CXLDVSECDevice, lock)] = 0x01; + /* range1/2_base_high/low is RW Lock */ + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi)] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 3] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_lo) + 3] = 0xF0; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi)] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 3] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_lo) + 3] = 0xF0; + break; + case NON_CXL_FUNCTION_MAP_DVSEC: + break; /* Not yet implemented */ + case EXTENSIONS_PORT_DVSEC: + wmask[offset + offsetof(CXLDVSECPortExtensions, control)] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortExtensions, control) + 1] = 0x40; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_base)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_limit)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 3] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 3] = 0xFF; + break; + case GPF_PORT_DVSEC: + wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl)] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl) + 1] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl)] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl) + 1] = 0x0F; + break; + case GPF_DEVICE_DVSEC: + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration)] = 0x0F; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration) + 1] = 0x0F; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power)] = 0xFF; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 3] = 0xFF; + break; + case PCIE_FLEXBUS_PORT_DVSEC: + switch (cxl_dev_type) { + case CXL2_ROOT_PORT: + /* No MLD */ + wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xbd; + break; + case CXL2_DOWNSTREAM_PORT: + wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xfd; + break; + default: /* Registers are RO for other component types */ + break; + } + /* There are rw1cs bits in the status register but never set currently */ + break; + } + + /* Update state for future DVSEC additions */ + range_init_nofail(&cxl->dvsecs[type], cxl->dvsec_offset, length); + cxl->dvsec_offset += length; +} + +uint8_t cxl_interleave_ways_enc(int iw, Error **errp) +{ + switch (iw) { + case 1: return 0x0; + case 2: return 0x1; + case 4: return 0x2; + case 8: return 0x3; + case 16: return 0x4; + case 3: return 0x8; + case 6: return 0x9; + case 12: return 0xa; + default: + error_setg(errp, "Interleave ways: %d not supported", iw); + return 0; + } +} + +uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp) +{ + switch (gran) { + case 256: return 0; + case 512: return 1; + case 1024: return 2; + case 2048: return 3; + case 4096: return 4; + case 8192: return 5; + case 16384: return 6; + default: + error_setg(errp, "Interleave granularity: %" PRIu64 " invalid", gran); + return 0; + } +} diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c new file mode 100644 index 0000000000..687759b301 --- /dev/null +++ b/hw/cxl/cxl-device-utils.c @@ -0,0 +1,265 @@ +/* + * CXL Utility library for devices + * + * Copyright(C) 2020 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/cxl/cxl.h" + +/* + * Device registers have no restrictions per the spec, and so fall back to the + * default memory mapped register rules in 8.2: + * Software shall use CXL.io Memory Read and Write to access memory mapped + * register defined in this section. Unless otherwise specified, software + * shall restrict the accesses width based on the following: + * • A 32 bit register shall be accessed as a 1 Byte, 2 Bytes or 4 Bytes + * quantity. + * • A 64 bit register shall be accessed as a 1 Byte, 2 Bytes, 4 Bytes or 8 + * Bytes + * • The address shall be a multiple of the access width, e.g. when + * accessing a register as a 4 Byte quantity, the address shall be + * multiple of 4. + * • The accesses shall map to contiguous bytes.If these rules are not + * followed, the behavior is undefined + */ + +static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + CXLDeviceState *cxl_dstate = opaque; + + if (size == 4) { + return cxl_dstate->caps_reg_state32[offset / sizeof(*cxl_dstate->caps_reg_state32)]; + } else { + return cxl_dstate->caps_reg_state64[offset / sizeof(*cxl_dstate->caps_reg_state64)]; + } +} + +static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + return 0; +} + +static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + CXLDeviceState *cxl_dstate = opaque; + + switch (size) { + case 1: + return cxl_dstate->mbox_reg_state[offset]; + case 2: + return cxl_dstate->mbox_reg_state16[offset / size]; + case 4: + return cxl_dstate->mbox_reg_state32[offset / size]; + case 8: + return cxl_dstate->mbox_reg_state64[offset / size]; + default: + g_assert_not_reached(); + } +} + +static void mailbox_mem_writel(uint32_t *reg_state, hwaddr offset, + uint64_t value) +{ + switch (offset) { + case A_CXL_DEV_MAILBOX_CTRL: + /* fallthrough */ + case A_CXL_DEV_MAILBOX_CAP: + /* RO register */ + break; + default: + qemu_log_mask(LOG_UNIMP, + "%s Unexpected 32-bit access to 0x%" PRIx64 " (WI)\n", + __func__, offset); + return; + } + + reg_state[offset / sizeof(*reg_state)] = value; +} + +static void mailbox_mem_writeq(uint64_t *reg_state, hwaddr offset, + uint64_t value) +{ + switch (offset) { + case A_CXL_DEV_MAILBOX_CMD: + break; + case A_CXL_DEV_BG_CMD_STS: + /* BG not supported */ + /* fallthrough */ + case A_CXL_DEV_MAILBOX_STS: + /* Read only register, will get updated by the state machine */ + return; + default: + qemu_log_mask(LOG_UNIMP, + "%s Unexpected 64-bit access to 0x%" PRIx64 " (WI)\n", + __func__, offset); + return; + } + + + reg_state[offset / sizeof(*reg_state)] = value; +} + +static void mailbox_reg_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLDeviceState *cxl_dstate = opaque; + + if (offset >= A_CXL_DEV_CMD_PAYLOAD) { + memcpy(cxl_dstate->mbox_reg_state + offset, &value, size); + return; + } + + switch (size) { + case 4: + mailbox_mem_writel(cxl_dstate->mbox_reg_state32, offset, value); + break; + case 8: + mailbox_mem_writeq(cxl_dstate->mbox_reg_state64, offset, value); + break; + default: + g_assert_not_reached(); + } + + if (ARRAY_FIELD_EX32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL, + DOORBELL)) { + cxl_process_mailbox(cxl_dstate); + } +} + +static uint64_t mdev_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + uint64_t retval = 0; + + retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MEDIA_STATUS, 1); + retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MBOX_READY, 1); + + return retval; +} + +static const MemoryRegionOps mdev_ops = { + .read = mdev_reg_read, + .write = NULL, /* memory device register is read only */ + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps mailbox_ops = { + .read = mailbox_reg_read, + .write = mailbox_reg_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps dev_ops = { + .read = dev_reg_read, + .write = NULL, /* status register is read only */ + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps caps_ops = { + .read = caps_reg_read, + .write = NULL, /* caps registers are read only */ + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate) +{ + /* This will be a BAR, so needs to be rounded up to pow2 for PCI spec */ + memory_region_init(&cxl_dstate->device_registers, obj, "device-registers", + pow2ceil(CXL_MMIO_SIZE)); + + memory_region_init_io(&cxl_dstate->caps, obj, &caps_ops, cxl_dstate, + "cap-array", CXL_CAPS_SIZE); + memory_region_init_io(&cxl_dstate->device, obj, &dev_ops, cxl_dstate, + "device-status", CXL_DEVICE_STATUS_REGISTERS_LENGTH); + memory_region_init_io(&cxl_dstate->mailbox, obj, &mailbox_ops, cxl_dstate, + "mailbox", CXL_MAILBOX_REGISTERS_LENGTH); + memory_region_init_io(&cxl_dstate->memory_device, obj, &mdev_ops, + cxl_dstate, "memory device caps", + CXL_MEMORY_DEVICE_REGISTERS_LENGTH); + + memory_region_add_subregion(&cxl_dstate->device_registers, 0, + &cxl_dstate->caps); + memory_region_add_subregion(&cxl_dstate->device_registers, + CXL_DEVICE_STATUS_REGISTERS_OFFSET, + &cxl_dstate->device); + memory_region_add_subregion(&cxl_dstate->device_registers, + CXL_MAILBOX_REGISTERS_OFFSET, + &cxl_dstate->mailbox); + memory_region_add_subregion(&cxl_dstate->device_registers, + CXL_MEMORY_DEVICE_REGISTERS_OFFSET, + &cxl_dstate->memory_device); +} + +static void device_reg_init_common(CXLDeviceState *cxl_dstate) { } + +static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) +{ + /* 2048 payload size, with no interrupt or background support */ + ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP, + PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT); + cxl_dstate->payload_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE; +} + +static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) { } + +void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) +{ + uint64_t *cap_hdrs = cxl_dstate->caps_reg_state64; + const int cap_count = 3; + + /* CXL Device Capabilities Array Register */ + ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_ID, 0); + ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1); + ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count); + + cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1); + device_reg_init_common(cxl_dstate); + + cxl_device_cap_init(cxl_dstate, MAILBOX, 2); + mailbox_reg_init_common(cxl_dstate); + + cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); + memdev_reg_init_common(cxl_dstate); + + assert(cxl_initialize_mailbox(cxl_dstate) == 0); +} diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c new file mode 100644 index 0000000000..24465a52ab --- /dev/null +++ b/hw/cxl/cxl-host-stubs.c @@ -0,0 +1,16 @@ +/* + * CXL host parameter parsing routine stubs + * + * Copyright (c) 2022 Huawei + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/cxl/cxl.h" + +void cxl_fixed_memory_window_config(MachineState *ms, + CXLFixedMemoryWindowOptions *object, + Error **errp) {}; + +void cxl_fixed_memory_window_link_targets(Error **errp) {}; + +const MemoryRegionOps cfmws_ops; diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c new file mode 100644 index 0000000000..469b3c4ced --- /dev/null +++ b/hw/cxl/cxl-host.c @@ -0,0 +1,222 @@ +/* + * CXL host parameter parsing routines + * + * Copyright (c) 2022 Huawei + * Modeled loosely on the NUMA options handling in hw/core/numa.c + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/bitmap.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "sysemu/qtest.h" +#include "hw/boards.h" + +#include "qapi/qapi-visit-machine.h" +#include "hw/cxl/cxl.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "hw/pci/pcie_port.h" + +void cxl_fixed_memory_window_config(MachineState *ms, + CXLFixedMemoryWindowOptions *object, + Error **errp) +{ + CXLFixedWindow *fw = g_malloc0(sizeof(*fw)); + strList *target; + int i; + + for (target = object->targets; target; target = target->next) { + fw->num_targets++; + } + + fw->enc_int_ways = cxl_interleave_ways_enc(fw->num_targets, errp); + if (*errp) { + return; + } + + fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets)); + for (i = 0, target = object->targets; target; i++, target = target->next) { + /* This link cannot be resolved yet, so stash the name for now */ + fw->targets[i] = g_strdup(target->value); + } + + if (object->size % (256 * MiB)) { + error_setg(errp, + "Size of a CXL fixed memory window must my a multiple of 256MiB"); + return; + } + fw->size = object->size; + + if (object->has_interleave_granularity) { + fw->enc_int_gran = + cxl_interleave_granularity_enc(object->interleave_granularity, + errp); + if (*errp) { + return; + } + } else { + /* Default to 256 byte interleave */ + fw->enc_int_gran = 0; + } + + ms->cxl_devices_state->fixed_windows = + g_list_append(ms->cxl_devices_state->fixed_windows, fw); + + return; +} + +void cxl_fixed_memory_window_link_targets(Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + if (ms->cxl_devices_state && ms->cxl_devices_state->fixed_windows) { + GList *it; + + for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + int i; + + for (i = 0; i < fw->num_targets; i++) { + Object *o; + bool ambig; + + o = object_resolve_path_type(fw->targets[i], + TYPE_PXB_CXL_DEVICE, + &ambig); + if (!o) { + error_setg(errp, "Could not resolve CXLFM target %s", + fw->targets[i]); + return; + } + fw->target_hbs[i] = PXB_CXL_DEV(o); + } + } + } +} + +/* TODO: support, multiple hdm decoders */ +static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, + uint8_t *target) +{ + uint32_t ctrl; + uint32_t ig_enc; + uint32_t iw_enc; + uint32_t target_reg; + uint32_t target_idx; + + ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { + return false; + } + + ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); + iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); + target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); + + if (target_idx > 4) { + target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO]; + target_reg >>= target_idx * 8; + } else { + target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO]; + target_reg >>= (target_idx - 4) * 8; + } + *target = target_reg & 0xff; + + return true; +} + +static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) +{ + CXLComponentState *hb_cstate; + PCIHostState *hb; + int rb_index; + uint32_t *cache_mem; + uint8_t target; + bool target_found; + PCIDevice *rp, *d; + + /* Address is relative to memory region. Convert to HPA */ + addr += fw->base; + + rb_index = (addr / cxl_decode_ig(fw->enc_int_gran)) % fw->num_targets; + hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl.cxl_host_bridge); + if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) { + return NULL; + } + + hb_cstate = cxl_get_hb_cstate(hb); + if (!hb_cstate) { + return NULL; + } + + cache_mem = hb_cstate->crb.cache_mem_registers; + + target_found = cxl_hdm_find_target(cache_mem, addr, &target); + if (!target_found) { + return NULL; + } + + rp = pcie_find_port_by_pn(hb->bus, target); + if (!rp) { + return NULL; + } + + d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0]; + + if (!d || !object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + return NULL; + } + + return d; +} + +static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + CXLFixedWindow *fw = opaque; + PCIDevice *d; + + d = cxl_cfmws_find_device(fw, addr); + if (d == NULL) { + *data = 0; + /* Reads to invalid address return poison */ + return MEMTX_ERROR; + } + + return cxl_type3_read(d, addr + fw->base, data, size, attrs); +} + +static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, + uint64_t data, unsigned size, + MemTxAttrs attrs) +{ + CXLFixedWindow *fw = opaque; + PCIDevice *d; + + d = cxl_cfmws_find_device(fw, addr); + if (d == NULL) { + /* Writes to invalid address are silent */ + return MEMTX_OK; + } + + return cxl_type3_write(d, addr + fw->base, data, size, attrs); +} + +const MemoryRegionOps cfmws_ops = { + .read_with_attrs = cxl_read_cfmws, + .write_with_attrs = cxl_write_cfmws, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = true, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = true, + }, +}; diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c new file mode 100644 index 0000000000..bb66c765a5 --- /dev/null +++ b/hw/cxl/cxl-mailbox-utils.c @@ -0,0 +1,478 @@ +/* + * CXL Utility library for mailbox interface + * + * Copyright(C) 2020 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/cxl/cxl.h" +#include "hw/pci/pci.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "qemu/uuid.h" + +/* + * How to add a new command, example. The command set FOO, with cmd BAR. + * 1. Add the command set and cmd to the enum. + * FOO = 0x7f, + * #define BAR 0 + * 2. Implement the handler + * static ret_code cmd_foo_bar(struct cxl_cmd *cmd, + * CXLDeviceState *cxl_dstate, uint16_t *len) + * 3. Add the command to the cxl_cmd_set[][] + * [FOO][BAR] = { "FOO_BAR", cmd_foo_bar, x, y }, + * 4. Implement your handler + * define_mailbox_handler(FOO_BAR) { ... return CXL_MBOX_SUCCESS; } + * + * + * Writing the handler: + * The handler will provide the &struct cxl_cmd, the &CXLDeviceState, and the + * in/out length of the payload. The handler is responsible for consuming the + * payload from cmd->payload and operating upon it as necessary. It must then + * fill the output data into cmd->payload (overwriting what was there), + * setting the length, and returning a valid return code. + * + * XXX: The handler need not worry about endianess. The payload is read out of + * a register interface that already deals with it. + */ + +enum { + EVENTS = 0x01, + #define GET_RECORDS 0x0 + #define CLEAR_RECORDS 0x1 + #define GET_INTERRUPT_POLICY 0x2 + #define SET_INTERRUPT_POLICY 0x3 + FIRMWARE_UPDATE = 0x02, + #define GET_INFO 0x0 + TIMESTAMP = 0x03, + #define GET 0x0 + #define SET 0x1 + LOGS = 0x04, + #define GET_SUPPORTED 0x0 + #define GET_LOG 0x1 + IDENTIFY = 0x40, + #define MEMORY_DEVICE 0x0 + CCLS = 0x41, + #define GET_PARTITION_INFO 0x0 + #define GET_LSA 0x2 + #define SET_LSA 0x3 +}; + +/* 8.2.8.4.5.1 Command Return Codes */ +typedef enum { + CXL_MBOX_SUCCESS = 0x0, + CXL_MBOX_BG_STARTED = 0x1, + CXL_MBOX_INVALID_INPUT = 0x2, + CXL_MBOX_UNSUPPORTED = 0x3, + CXL_MBOX_INTERNAL_ERROR = 0x4, + CXL_MBOX_RETRY_REQUIRED = 0x5, + CXL_MBOX_BUSY = 0x6, + CXL_MBOX_MEDIA_DISABLED = 0x7, + CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, + CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, + CXL_MBOX_FW_AUTH_FAILED = 0xa, + CXL_MBOX_FW_INVALID_SLOT = 0xb, + CXL_MBOX_FW_ROLLEDBACK = 0xc, + CXL_MBOX_FW_REST_REQD = 0xd, + CXL_MBOX_INVALID_HANDLE = 0xe, + CXL_MBOX_INVALID_PA = 0xf, + CXL_MBOX_INJECT_POISON_LIMIT = 0x10, + CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, + CXL_MBOX_ABORTED = 0x12, + CXL_MBOX_INVALID_SECURITY_STATE = 0x13, + CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, + CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, + CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, + CXL_MBOX_MAX = 0x17 +} ret_code; + +struct cxl_cmd; +typedef ret_code (*opcode_handler)(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, uint16_t *len); +struct cxl_cmd { + const char *name; + opcode_handler handler; + ssize_t in; + uint16_t effect; /* Reported in CEL */ + uint8_t *payload; +}; + +#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \ + uint16_t __zero##name = size; \ + static ret_code cmd_##name(struct cxl_cmd *cmd, \ + CXLDeviceState *cxl_dstate, uint16_t *len) \ + { \ + *len = __zero##name; \ + memset(cmd->payload, 0, *len); \ + return CXL_MBOX_SUCCESS; \ + } +#define DEFINE_MAILBOX_HANDLER_NOP(name) \ + static ret_code cmd_##name(struct cxl_cmd *cmd, \ + CXLDeviceState *cxl_dstate, uint16_t *len) \ + { \ + return CXL_MBOX_SUCCESS; \ + } + +DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20); +DEFINE_MAILBOX_HANDLER_NOP(events_clear_records); +DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); +DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); + +/* 8.2.9.2.1 */ +static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint8_t slots_supported; + uint8_t slot_info; + uint8_t caps; + uint8_t rsvd[0xd]; + char fw_rev1[0x10]; + char fw_rev2[0x10]; + char fw_rev3[0x10]; + char fw_rev4[0x10]; + } QEMU_PACKED *fw_info; + QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); + + if (cxl_dstate->pmem_size < (256 << 20)) { + return CXL_MBOX_INTERNAL_ERROR; + } + + fw_info = (void *)cmd->payload; + memset(fw_info, 0, sizeof(*fw_info)); + + fw_info->slots_supported = 2; + fw_info->slot_info = BIT(0) | BIT(3); + fw_info->caps = 0; + pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0"); + + *len = sizeof(*fw_info); + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.3.1 */ +static ret_code cmd_timestamp_get(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + uint64_t time, delta; + uint64_t final_time = 0; + + if (cxl_dstate->timestamp.set) { + /* First find the delta from the last time the host set the time. */ + time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + delta = time - cxl_dstate->timestamp.last_set; + final_time = cxl_dstate->timestamp.host_set + delta; + } + + /* Then adjust the actual time */ + stq_le_p(cmd->payload, final_time); + *len = 8; + + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.3.2 */ +static ret_code cmd_timestamp_set(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + cxl_dstate->timestamp.set = true; + cxl_dstate->timestamp.last_set = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + cxl_dstate->timestamp.host_set = le64_to_cpu(*(uint64_t *)cmd->payload); + + *len = 0; + return CXL_MBOX_SUCCESS; +} + +static QemuUUID cel_uuid; + +/* 8.2.9.4.1 */ +static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint16_t entries; + uint8_t rsvd[6]; + struct { + QemuUUID uuid; + uint32_t size; + } log_entries[1]; + } QEMU_PACKED *supported_logs = (void *)cmd->payload; + QEMU_BUILD_BUG_ON(sizeof(*supported_logs) != 0x1c); + + supported_logs->entries = 1; + supported_logs->log_entries[0].uuid = cel_uuid; + supported_logs->log_entries[0].size = 4 * cxl_dstate->cel_size; + + *len = sizeof(*supported_logs); + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.4.2 */ +static ret_code cmd_logs_get_log(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + QemuUUID uuid; + uint32_t offset; + uint32_t length; + } QEMU_PACKED QEMU_ALIGNED(16) *get_log = (void *)cmd->payload; + + /* + * 8.2.9.4.2 + * The device shall return Invalid Parameter if the Offset or Length + * fields attempt to access beyond the size of the log as reported by Get + * Supported Logs. + * + * XXX: Spec is wrong, "Invalid Parameter" isn't a thing. + * XXX: Spec doesn't address incorrect UUID incorrectness. + * + * The CEL buffer is large enough to fit all commands in the emulation, so + * the only possible failure would be if the mailbox itself isn't big + * enough. + */ + if (get_log->offset + get_log->length > cxl_dstate->payload_size) { + return CXL_MBOX_INVALID_INPUT; + } + + if (!qemu_uuid_is_equal(&get_log->uuid, &cel_uuid)) { + return CXL_MBOX_UNSUPPORTED; + } + + /* Store off everything to local variables so we can wipe out the payload */ + *len = get_log->length; + + memmove(cmd->payload, cxl_dstate->cel_log + get_log->offset, + get_log->length); + + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.5.1.1 */ +static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + char fw_revision[0x10]; + uint64_t total_capacity; + uint64_t volatile_capacity; + uint64_t persistent_capacity; + uint64_t partition_align; + uint16_t info_event_log_size; + uint16_t warning_event_log_size; + uint16_t failure_event_log_size; + uint16_t fatal_event_log_size; + uint32_t lsa_size; + uint8_t poison_list_max_mer[3]; + uint16_t inject_poison_limit; + uint8_t poison_caps; + uint8_t qos_telemetry_caps; + } QEMU_PACKED *id; + QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43); + + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + uint64_t size = cxl_dstate->pmem_size; + + if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + return CXL_MBOX_INTERNAL_ERROR; + } + + id = (void *)cmd->payload; + memset(id, 0, sizeof(*id)); + + /* PMEM only */ + snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); + + id->total_capacity = size / (256 << 20); + id->persistent_capacity = size / (256 << 20); + id->lsa_size = cvc->get_lsa_size(ct3d); + + *len = sizeof(*id); + return CXL_MBOX_SUCCESS; +} + +static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint64_t active_vmem; + uint64_t active_pmem; + uint64_t next_vmem; + uint64_t next_pmem; + } QEMU_PACKED *part_info = (void *)cmd->payload; + QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); + uint64_t size = cxl_dstate->pmem_size; + + if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + return CXL_MBOX_INTERNAL_ERROR; + } + + /* PMEM only */ + part_info->active_vmem = 0; + part_info->next_vmem = 0; + part_info->active_pmem = size / (256 << 20); + part_info->next_pmem = 0; + + *len = sizeof(*part_info); + return CXL_MBOX_SUCCESS; +} + +static ret_code cmd_ccls_get_lsa(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint32_t offset; + uint32_t length; + } QEMU_PACKED *get_lsa; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + uint32_t offset, length; + + get_lsa = (void *)cmd->payload; + offset = get_lsa->offset; + length = get_lsa->length; + + if (offset + length > cvc->get_lsa_size(ct3d)) { + *len = 0; + return CXL_MBOX_INVALID_INPUT; + } + + *len = cvc->get_lsa(ct3d, get_lsa, length, offset); + return CXL_MBOX_SUCCESS; +} + +static ret_code cmd_ccls_set_lsa(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct set_lsa_pl { + uint32_t offset; + uint32_t rsvd; + uint8_t data[]; + } QEMU_PACKED; + struct set_lsa_pl *set_lsa_payload = (void *)cmd->payload; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + const size_t hdr_len = offsetof(struct set_lsa_pl, data); + uint16_t plen = *len; + + *len = 0; + if (!plen) { + return CXL_MBOX_SUCCESS; + } + + if (set_lsa_payload->offset + plen > cvc->get_lsa_size(ct3d) + hdr_len) { + return CXL_MBOX_INVALID_INPUT; + } + plen -= hdr_len; + + cvc->set_lsa(ct3d, set_lsa_payload->data, plen, set_lsa_payload->offset); + return CXL_MBOX_SUCCESS; +} + +#define IMMEDIATE_CONFIG_CHANGE (1 << 1) +#define IMMEDIATE_DATA_CHANGE (1 << 2) +#define IMMEDIATE_POLICY_CHANGE (1 << 3) +#define IMMEDIATE_LOG_CHANGE (1 << 4) + +static struct cxl_cmd cxl_cmd_set[256][256] = { + [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS", + cmd_events_get_records, 1, 0 }, + [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", + cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, + [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", + cmd_events_get_interrupt_policy, 0, 0 }, + [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", + cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE }, + [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", + cmd_firmware_update_get_info, 0, 0 }, + [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, + [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, IMMEDIATE_POLICY_CHANGE }, + [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, + [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, + [IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE", + cmd_identify_memory_device, 0, 0 }, + [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", + cmd_ccls_get_partition_info, 0, 0 }, + [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, + [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, + ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, +}; + +void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +{ + uint16_t ret = CXL_MBOX_SUCCESS; + struct cxl_cmd *cxl_cmd; + uint64_t status_reg; + opcode_handler h; + uint64_t command_reg = cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD]; + + uint8_t set = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET); + uint8_t cmd = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND); + uint16_t len = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH); + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { + if (len == cxl_cmd->in) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + ret = (*h)(cxl_cmd, cxl_dstate, &len); + assert(len <= cxl_dstate->payload_size); + } else { + ret = CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + } else { + qemu_log_mask(LOG_UNIMP, "Command %04xh not implemented\n", + set << 8 | cmd); + ret = CXL_MBOX_UNSUPPORTED; + } + + /* Set the return code */ + status_reg = FIELD_DP64(0, CXL_DEV_MAILBOX_STS, ERRNO, ret); + + /* Set the return length */ + command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET, 0); + command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND, 0); + command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH, len); + + cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD] = command_reg; + cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_STS] = status_reg; + + /* Tell the host we're done */ + ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL, + DOORBELL, 0); +} + +int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) +{ + /* CXL 2.0: Table 169 Get Supported Logs Log Entry */ + const char *cel_uuidstr = "0da9c0b5-bf41-4b78-8f79-96b1623b3f17"; + + for (int set = 0; set < 256; set++) { + for (int cmd = 0; cmd < 256; cmd++) { + if (cxl_cmd_set[set][cmd].handler) { + struct cxl_cmd *c = &cxl_cmd_set[set][cmd]; + struct cel_log *log = + &cxl_dstate->cel_log[cxl_dstate->cel_size]; + + log->opcode = (set << 8) | cmd; + log->effect = c->effect; + cxl_dstate->cel_size++; + } + } + } + + return qemu_uuid_parse(cel_uuidstr, &cel_uuid); +} diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build new file mode 100644 index 0000000000..f117b99949 --- /dev/null +++ b/hw/cxl/meson.build @@ -0,0 +1,12 @@ +softmmu_ss.add(when: 'CONFIG_CXL', + if_true: files( + 'cxl-component-utils.c', + 'cxl-device-utils.c', + 'cxl-mailbox-utils.c', + 'cxl-host.c', + ), + if_false: files( + 'cxl-host-stubs.c', + )) + +softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl-host-stubs.c')) diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 09818231bd..96e56c4467 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -565,6 +565,12 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp) g->vhost_gpu_fd = -1; } +static struct vhost_dev *vhost_user_gpu_get_vhost(VirtIODevice *vdev) +{ + VhostUserGPU *g = VHOST_USER_GPU(vdev); + return &g->vhost->dev; +} + static Property vhost_user_gpu_properties[] = { VIRTIO_GPU_BASE_PROPERTIES(VhostUserGPU, parent_obj.conf), DEFINE_PROP_END_OF_LIST(), @@ -586,6 +592,7 @@ vhost_user_gpu_class_init(ObjectClass *klass, void *data) vdc->guest_notifier_pending = vhost_user_gpu_guest_notifier_pending; vdc->get_config = vhost_user_gpu_get_config; vdc->set_config = vhost_user_gpu_set_config; + vdc->get_vhost = vhost_user_gpu_get_vhost; device_class_set_props(dc, vhost_user_gpu_properties); } diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index fff0fb4a82..8ba5da4312 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -173,7 +173,7 @@ virtio_gpu_base_device_realize(DeviceState *qdev, } g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs); - virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU, + virtio_init(VIRTIO_DEVICE(g), VIRTIO_ID_GPU, sizeof(struct virtio_gpu_config)); if (virtio_gpu_virgl_enabled(g->conf)) { diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index 4d054ca869..d1e174b1f4 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -147,7 +147,7 @@ static FWCfgState *create_fw_cfg(MachineState *ms) fw_cfg_add_file(fw_cfg, "/etc/power-button-addr", g_memdup(&val, sizeof(val)), sizeof(val)); - fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_order[0]); + fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_config.order[0]); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); return fw_cfg; @@ -391,8 +391,8 @@ static void machine_hppa_init(MachineState *machine) * mode (kernel_entry=1), and to boot from CD (gr[24]='d') * or hard disc * (gr[24]='c'). */ - kernel_entry = boot_menu ? 1 : 0; - cpu[0]->env.gr[24] = machine->boot_order[0]; + kernel_entry = machine->boot_config.has_menu ? machine->boot_config.menu : 0; + cpu[0]->env.gr[24] = machine->boot_config.order[0]; } /* We jump to the firmware entry routine and pass the diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index dcf6ece3d0..c125939ed6 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -28,6 +28,7 @@ #include "qemu/bitmap.h" #include "qemu/error-report.h" #include "hw/pci/pci.h" +#include "hw/cxl/cxl.h" #include "hw/core/cpu.h" #include "target/i386/cpu.h" #include "hw/misc/pvpanic.h" @@ -66,6 +67,7 @@ #include "hw/acpi/aml-build.h" #include "hw/acpi/utils.h" #include "hw/acpi/pci.h" +#include "hw/acpi/cxl.h" #include "qom/qom-qobject.h" #include "hw/i386/amd_iommu.h" @@ -75,6 +77,7 @@ #include "hw/acpi/ipmi.h" #include "hw/acpi/hmat.h" #include "hw/acpi/viot.h" +#include "hw/acpi/cxl.h" #include CONFIG_DEVICES @@ -1409,6 +1412,22 @@ static void build_smb0(Aml *table, I2CBus *smbus, int devnr, int func) aml_append(table, scope); } +static void build_acpi0017(Aml *table) +{ + Aml *dev, *scope, *method; + + scope = aml_scope("_SB"); + dev = aml_device("CXLM"); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017"))); + + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0x01))); + aml_append(dev, method); + + aml_append(scope, dev); + aml_append(table, scope); +} + static void build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiMiscInfo *misc, @@ -1428,6 +1447,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, #ifdef CONFIG_TPM TPMIf *tpm = tpm_find(); #endif + bool cxl_present = false; int i; VMBusBridge *vmbus_bridge = vmbus_bridge_find(); AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, @@ -1572,10 +1592,25 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } scope = aml_scope("\\_SB"); - dev = aml_device("PC%.02X", bus_num); + + if (pci_bus_is_cxl(bus)) { + dev = aml_device("CL%.02X", bus_num); + } else { + dev = aml_device("PC%.02X", bus_num); + } aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); - if (pci_bus_is_express(bus)) { + if (pci_bus_is_cxl(bus)) { + struct Aml *pkg = aml_package(2); + + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016"))); + aml_append(pkg, aml_eisaid("PNP0A08")); + aml_append(pkg, aml_eisaid("PNP0A03")); + aml_append(dev, aml_name_decl("_CID", pkg)); + aml_append(dev, aml_name_decl("_ADR", aml_int(0))); + aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); + build_cxl_osc_method(dev); + } else if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); @@ -1595,9 +1630,23 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); aml_append(dsdt, scope); + + /* Handle the ranges for the PXB expanders */ + if (pci_bus_is_cxl(bus)) { + MemoryRegion *mr = &machine->cxl_devices_state->host_mr; + uint64_t base = mr->addr; + + cxl_present = true; + crs_range_insert(crs_range_set.mem_ranges, base, + base + memory_region_size(mr) - 1); + } } } + if (cxl_present) { + build_acpi0017(dsdt); + } + /* * At this point crs_range_set has all the ranges used by pci * busses *other* than PCI0. These ranges will be excluded from @@ -2662,6 +2711,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) machine->nvdimms_state, machine->ram_slots, x86ms->oem_id, x86ms->oem_table_id); } + if (machine->cxl_devices_state->is_enabled) { + cxl_build_cedt(machine, table_offsets, tables_blob, tables->linker, + x86ms->oem_id, x86ms->oem_table_id); + } acpi_add_table(table_offsets, tables_blob); build_waet(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index ea8eaeb330..725f69095b 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -201,15 +201,18 @@ static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, /* * AMDVi event structure * 0:15 -> DeviceID - * 55:63 -> event type + miscellaneous info - * 63:127 -> related address + * 48:63 -> event type + miscellaneous info + * 64:127 -> related address */ static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, uint16_t info) { + evt[0] = 0; + evt[1] = 0; + amdvi_setevent_bits(evt, devid, 0, 16); - amdvi_setevent_bits(evt, info, 55, 8); - amdvi_setevent_bits(evt, addr, 63, 64); + amdvi_setevent_bits(evt, info, 48, 16); + amdvi_setevent_bits(evt, addr, 64, 64); } /* log an error encountered during a page walk * @@ -218,7 +221,7 @@ static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, static void amdvi_page_fault(AMDVIState *s, uint16_t devid, hwaddr addr, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; amdvi_encode_event(evt, devid, addr, info); @@ -234,7 +237,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid, static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, hwaddr devtab, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; @@ -248,7 +251,8 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, */ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) { - uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR; + uint64_t evt[2]; + uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR; amdvi_encode_event(evt, 0, addr, info); amdvi_log_event(s, evt); @@ -261,7 +265,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, hwaddr addr) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; amdvi_encode_event(evt, 0, addr, info); @@ -276,7 +280,7 @@ static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, hwaddr addr, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; amdvi_encode_event(evt, devid, addr, info); @@ -288,7 +292,7 @@ static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, hwaddr addr, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; amdvi_encode_event(evt, devid, addr, info); diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h index 2a0ceb8b9c..04f93780f9 100644 --- a/hw/i386/e820_memory_layout.h +++ b/hw/i386/e820_memory_layout.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: MIT */ -#ifndef HW_I386_E820_H -#define HW_I386_E820_H +#ifndef HW_I386_E820_MEMORY_LAYOUT_H +#define HW_I386_E820_MEMORY_LAYOUT_H /* e820 types */ #define E820_RAM 1 diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c64aa81a83..2162394e08 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -181,6 +181,18 @@ static void vtd_update_scalable_state(IntelIOMMUState *s) } } +static void vtd_update_iq_dw(IntelIOMMUState *s) +{ + uint64_t val = vtd_get_quad_raw(s, DMAR_IQA_REG); + + if (s->ecap & VTD_ECAP_SMTS && + val & VTD_IQA_DW_MASK) { + s->iq_dw = true; + } else { + s->iq_dw = false; + } +} + /* Whether the address space needs to notify new mappings */ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as) { @@ -469,11 +481,6 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, assert(fault < VTD_FR_MAX); - if (fault == VTD_FR_RESERVED_ERR) { - /* This is not a normal fault reason case. Drop it. */ - return; - } - trace_vtd_dmar_fault(source_id, fault, addr, is_write); if (fsts_reg & VTD_FSTS_PFO) { @@ -1025,6 +1032,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t slpte; uint64_t access_right_check; + uint64_t xlat, size; if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", @@ -1069,11 +1077,33 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (vtd_is_last_slpte(slpte, level)) { *slptep = slpte; *slpte_level = level; - return 0; + break; } addr = vtd_get_slpte_addr(slpte, aw_bits); level--; } + + xlat = vtd_get_slpte_addr(*slptep, aw_bits); + size = ~vtd_slpt_level_page_mask(level) + 1; + + /* + * From VT-d spec 3.14: Untranslated requests and translation + * requests that result in an address in the interrupt range will be + * blocked with condition code LGN.4 or SGN.8. + */ + if ((xlat > VTD_INTERRUPT_ADDR_LAST || + xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) { + return 0; + } else { + error_report_once("%s: xlat address is in interrupt range " + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " + "slpte=0x%" PRIx64 ", write=%d, " + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")", + __func__, iova, level, slpte, is_write, + xlat, size); + return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : + -VTD_FR_INTERRUPT_ADDR; + } } typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); @@ -1633,11 +1663,12 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_PAGING_ENTRY_INV] = true, [VTD_FR_ROOT_TABLE_INV] = false, [VTD_FR_CONTEXT_TABLE_INV] = false, + [VTD_FR_INTERRUPT_ADDR] = true, [VTD_FR_ROOT_ENTRY_RSVD] = false, [VTD_FR_PAGING_ENTRY_RSVD] = true, [VTD_FR_CONTEXT_ENTRY_TT] = true, [VTD_FR_PASID_TABLE_INV] = false, - [VTD_FR_RESERVED_ERR] = false, + [VTD_FR_SM_INTERRUPT_ADDR] = true, [VTD_FR_MAX] = false, }; @@ -2209,12 +2240,13 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) /* Handle write to Global Command Register */ static void vtd_handle_gcmd_write(IntelIOMMUState *s) { + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG); uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); uint32_t changed = status ^ val; trace_vtd_reg_write_gcmd(status, val); - if (changed & VTD_GCMD_TE) { + if ((changed & VTD_GCMD_TE) && s->dma_translation) { /* Translation enable/disable */ vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); } @@ -2230,7 +2262,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Set/update the interrupt remapping root-table pointer */ vtd_handle_gcmd_sirtp(s); } - if (changed & VTD_GCMD_IRE) { + if ((changed & VTD_GCMD_IRE) && + x86_iommu_ir_supported(x86_iommu)) { /* Interrupt remap enable/disable */ vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); } @@ -2883,12 +2916,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, } else { vtd_set_quad(s, addr, val); } - if (s->ecap & VTD_ECAP_SMTS && - val & VTD_IQA_DW_MASK) { - s->iq_dw = true; - } else { - s->iq_dw = false; - } + vtd_update_iq_dw(s); break; case DMAR_IQA_REG_HI: @@ -3032,7 +3060,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, /* TODO: add support for VFIO and vhost users */ if (s->snoop_control) { - error_setg_errno(errp, -ENOTSUP, + error_setg_errno(errp, ENOTSUP, "Snoop Control with vhost or VFIO is not supported"); return -ENOTSUP; } @@ -3053,13 +3081,6 @@ static int vtd_post_load(void *opaque, int version_id) IntelIOMMUState *iommu = opaque; /* - * Memory regions are dynamically turned on/off depending on - * context entry configurations from the guest. After migration, - * we need to make sure the memory regions are still correct. - */ - vtd_switch_address_space_all(iommu); - - /* * We don't need to migrate the root_scalable because we can * simply do the calculation after the loading is complete. We * can actually do similar things with root, dmar_enabled, etc. @@ -3068,6 +3089,15 @@ static int vtd_post_load(void *opaque, int version_id) */ vtd_update_scalable_state(iommu); + vtd_update_iq_dw(iommu); + + /* + * Memory regions are dynamically turned on/off depending on + * context entry configurations from the guest. After migration, + * we need to make sure the memory regions are still correct. + */ + vtd_switch_address_space_all(iommu); + return 0; } @@ -3122,6 +3152,7 @@ static Property vtd_properties[] = { DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), + DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_END_OF_LIST(), }; @@ -3627,12 +3658,17 @@ static void vtd_init(IntelIOMMUState *s) s->next_frcd_reg = 0; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | - VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + VTD_CAP_MGAW(s->aw_bits); if (s->dma_drain) { s->cap |= VTD_CAP_DRAIN; } - if (s->aw_bits == VTD_HOST_AW_48BIT) { - s->cap |= VTD_CAP_SAGAW_48bit; + if (s->dma_translation) { + if (s->aw_bits >= VTD_HOST_AW_39BIT) { + s->cap |= VTD_CAP_SAGAW_39bit; + } + if (s->aw_bits >= VTD_HOST_AW_48BIT) { + s->cap |= VTD_CAP_SAGAW_48bit; + } } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; @@ -3778,15 +3814,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) { - if (!kvm_irqchip_in_kernel()) { + if (!kvm_irqchip_is_split()) { error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); return false; } - if (!kvm_enable_x2apic()) { - error_setg(errp, "eim=on requires support on the KVM side" - "(X2APIC_API, first shipped in v4.7)"); - return false; - } } /* Currently only address widths supported are 39 and 48 bits */ diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 1ff13b40f9..930ce61feb 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -289,6 +289,8 @@ typedef enum VTDFaultReason { * context-entry. */ VTD_FR_CONTEXT_ENTRY_TT, + /* Output address in the interrupt address range */ + VTD_FR_INTERRUPT_ADDR = 0xE, /* Interrupt remapping transition faults */ VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved @@ -304,11 +306,8 @@ typedef enum VTDFaultReason { VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */ - /* This is not a normal fault reason. We use this to indicate some faults - * that are not referenced by the VT-d specification. - * Fault event with such reason should not be recorded. - */ - VTD_FR_RESERVED_ERR, + /* Output address in the interrupt address range for scalable mode */ + VTD_FR_SM_INTERRUPT_ADDR = 0x87, VTD_FR_MAX, /* Guard */ } VTDFaultReason; diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 4b3b1dd262..754f1d0593 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -247,7 +247,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) x86ms->pci_irq_mask = 0; } - if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) { + if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { qemu_irq *i8259; i8259 = i8259_init(isa_bus, x86_allocate_cpu_irq()); @@ -257,7 +257,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) g_free(i8259); } - if (mms->pit == ON_OFF_AUTO_ON || mms->pit == ON_OFF_AUTO_AUTO) { + if (x86ms->pit == ON_OFF_AUTO_ON || x86ms->pit == ON_OFF_AUTO_AUTO) { if (kvm_pit_in_kernel()) { kvm_pit_init(isa_bus, 0x40); } else { @@ -491,40 +491,6 @@ static void microvm_machine_reset(MachineState *machine) } } -static void microvm_machine_get_pic(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - OnOffAuto pic = mms->pic; - - visit_type_OnOffAuto(v, name, &pic, errp); -} - -static void microvm_machine_set_pic(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - - visit_type_OnOffAuto(v, name, &mms->pic, errp); -} - -static void microvm_machine_get_pit(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - OnOffAuto pit = mms->pit; - - visit_type_OnOffAuto(v, name, &pit, errp); -} - -static void microvm_machine_set_pit(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - - visit_type_OnOffAuto(v, name, &mms->pit, errp); -} - static void microvm_machine_get_rtc(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -649,8 +615,6 @@ static void microvm_machine_initfn(Object *obj) MicrovmMachineState *mms = MICROVM_MACHINE(obj); /* Configuration */ - mms->pic = ON_OFF_AUTO_AUTO; - mms->pit = ON_OFF_AUTO_AUTO; mms->rtc = ON_OFF_AUTO_AUTO; mms->pcie = ON_OFF_AUTO_AUTO; mms->ioapic2 = ON_OFF_AUTO_AUTO; @@ -702,20 +666,6 @@ static void microvm_class_init(ObjectClass *oc, void *data) x86mc->fwcfg_dma_enabled = true; - object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto", - microvm_machine_get_pic, - microvm_machine_set_pic, - NULL, NULL); - object_class_property_set_description(oc, MICROVM_MACHINE_PIC, - "Enable i8259 PIC"); - - object_class_property_add(oc, MICROVM_MACHINE_PIT, "OnOffAuto", - microvm_machine_get_pit, - microvm_machine_set_pit, - NULL, NULL); - object_class_property_set_description(oc, MICROVM_MACHINE_PIT, - "Enable i8254 PIT"); - object_class_property_add(oc, MICROVM_MACHINE_RTC, "OnOffAuto", microvm_machine_get_rtc, microvm_machine_set_rtc, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 23bba9d82c..7c39c91335 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -75,6 +75,7 @@ #include "acpi-build.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" +#include "hw/cxl/cxl.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" #include "qapi/qapi-visit-machine.h" @@ -675,7 +676,7 @@ void pc_cmos_init(PCMachineState *pcms, object_property_set_link(OBJECT(pcms), "rtc_state", OBJECT(s), &error_abort); - set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal); + set_boot_dev(s, MACHINE(pcms)->boot_config.order, &error_fatal); val = 0; val |= 0x02; /* FPU is there */ @@ -743,14 +744,6 @@ void pc_machine_done(Notifier *notifier, void *data) /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); } - - - if (x86ms->apic_id_limit > 255 && !xen_enabled() && - !kvm_irqchip_in_kernel()) { - error_report("current -smp configuration requires kernel " - "irqchip support."); - exit(EXIT_FAILURE); - } } void pc_guest_info_init(PCMachineState *pcms) @@ -816,6 +809,7 @@ void pc_memory_init(PCMachineState *pcms, MachineClass *mc = MACHINE_GET_CLASS(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr cxl_base, cxl_resv_end = 0; assert(machine->ram_size == x86ms->below_4g_mem_size + x86ms->above_4g_mem_size); @@ -905,6 +899,44 @@ void pc_memory_init(PCMachineState *pcms, &machine->device_memory->mr); } + if (machine->cxl_devices_state->is_enabled) { + MemoryRegion *mr = &machine->cxl_devices_state->host_mr; + hwaddr cxl_size = MiB; + + if (pcmc->has_reserved_memory && machine->device_memory->base) { + cxl_base = machine->device_memory->base; + if (!pcmc->broken_reserved_end) { + cxl_base += memory_region_size(&machine->device_memory->mr); + } + } else if (pcms->sgx_epc.size != 0) { + cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc); + } else { + cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size; + } + + e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; + if (machine->cxl_devices_state->fixed_windows) { + hwaddr cxl_fmw_base; + GList *it; + + cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB); + for (it = machine->cxl_devices_state->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + + fw->base = cxl_fmw_base; + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); + e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + } + } + /* Initialize PC system firmware */ pc_system_firmware_init(pcms, rom_memory); @@ -932,6 +964,10 @@ void pc_memory_init(PCMachineState *pcms, if (!pcmc->broken_reserved_end) { res_mem_end += memory_region_size(&machine->device_memory->mr); } + + if (machine->cxl_devices_state->is_enabled) { + res_mem_end = cxl_resv_end; + } *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); } @@ -965,7 +1001,17 @@ uint64_t pc_pci_hole64_start(void) X86MachineState *x86ms = X86_MACHINE(pcms); uint64_t hole64_start = 0; - if (pcmc->has_reserved_memory && ms->device_memory->base) { + if (ms->cxl_devices_state->host_mr.addr) { + hole64_start = ms->cxl_devices_state->host_mr.addr + + memory_region_size(&ms->cxl_devices_state->host_mr); + if (ms->cxl_devices_state->fixed_windows) { + GList *it; + for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + hole64_start = fw->mr.addr + memory_region_size(&fw->mr); + } + } + } else if (pcmc->has_reserved_memory && ms->device_memory->base) { hole64_start = ms->device_memory->base; if (!pcmc->broken_reserved_end) { hole64_start += memory_region_size(&ms->device_memory->mr); @@ -1077,6 +1123,7 @@ void pc_basic_device_init(struct PCMachineState *pcms, ISADevice *pit = NULL; MemoryRegion *ioport80_io = g_new(MemoryRegion, 1); MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1); + X86MachineState *x86ms = X86_MACHINE(pcms); memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1); memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io); @@ -1121,7 +1168,8 @@ void pc_basic_device_init(struct PCMachineState *pcms, qemu_register_boot_set(pc_boot_set, *rtc_state); - if (!xen_enabled() && pcms->pit_enabled) { + if (!xen_enabled() && + (x86ms->pit == ON_OFF_AUTO_AUTO || x86ms->pit == ON_OFF_AUTO_ON)) { if (kvm_pit_in_kernel()) { pit = kvm_pit_init(isa_bus, 0x40); } else { @@ -1491,20 +1539,6 @@ static void pc_machine_set_sata(Object *obj, bool value, Error **errp) pcms->sata_enabled = value; } -static bool pc_machine_get_pit(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return pcms->pit_enabled; -} - -static void pc_machine_set_pit(Object *obj, bool value, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - pcms->pit_enabled = value; -} - static bool pc_machine_get_hpet(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -1661,7 +1695,6 @@ static void pc_machine_initfn(Object *obj) pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; pcms->smbus_enabled = true; pcms->sata_enabled = true; - pcms->pit_enabled = true; pcms->i8042_enabled = true; pcms->max_fw_size = 8 * MiB; #ifdef CONFIG_HPET @@ -1761,6 +1794,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE; mc->nvdimm_supported = true; mc->smp_props.dies_supported = true; + mc->cxl_supported = true; mc->default_ram_id = "pc.ram"; object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", @@ -1789,11 +1823,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, PC_MACHINE_SATA, "Enable/disable Serial ATA bus"); - object_class_property_add_bool(oc, PC_MACHINE_PIT, - pc_machine_get_pit, pc_machine_set_pit); - object_class_property_set_description(oc, PC_MACHINE_PIT, - "Enable/disable Intel 8254 programmable interval timer emulation"); - object_class_property_add_bool(oc, "hpet", pc_machine_get_hpet, pc_machine_set_hpet); object_class_property_set_description(oc, "hpet", diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 4c185c72d0..578e537b35 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -218,7 +218,9 @@ static void pc_init1(MachineState *machine, } isa_bus_irqs(isa_bus, x86ms->gsi); - pc_i8259_create(isa_bus, gsi_state->i8259_irq); + if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { + pc_i8259_create(isa_bus, gsi_state->i8259_irq); + } if (pcmc->pci_enabled) { ioapic_init_gsi(gsi_state, "i440fx"); @@ -813,124 +815,6 @@ static void pc_i440fx_1_4_machine_options(MachineClass *m) DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn, pc_i440fx_1_4_machine_options); -typedef struct { - uint16_t gpu_device_id; - uint16_t pch_device_id; - uint8_t pch_revision_id; -} IGDDeviceIDInfo; - -/* In real world different GPU should have different PCH. But actually - * the different PCH DIDs likely map to different PCH SKUs. We do the - * same thing for the GPU. For PCH, the different SKUs are going to be - * all the same silicon design and implementation, just different - * features turn on and off with fuses. The SW interfaces should be - * consistent across all SKUs in a given family (eg LPT). But just same - * features may not be supported. - * - * Most of these different PCH features probably don't matter to the - * Gfx driver, but obviously any difference in display port connections - * will so it should be fine with any PCH in case of passthrough. - * - * So currently use one PCH version, 0x8c4e, to cover all HSW(Haswell) - * scenarios, 0x9cc3 for BDW(Broadwell). - */ -static const IGDDeviceIDInfo igd_combo_id_infos[] = { - /* HSW Classic */ - {0x0402, 0x8c4e, 0x04}, /* HSWGT1D, HSWD_w7 */ - {0x0406, 0x8c4e, 0x04}, /* HSWGT1M, HSWM_w7 */ - {0x0412, 0x8c4e, 0x04}, /* HSWGT2D, HSWD_w7 */ - {0x0416, 0x8c4e, 0x04}, /* HSWGT2M, HSWM_w7 */ - {0x041E, 0x8c4e, 0x04}, /* HSWGT15D, HSWD_w7 */ - /* HSW ULT */ - {0x0A06, 0x8c4e, 0x04}, /* HSWGT1UT, HSWM_w7 */ - {0x0A16, 0x8c4e, 0x04}, /* HSWGT2UT, HSWM_w7 */ - {0x0A26, 0x8c4e, 0x06}, /* HSWGT3UT, HSWM_w7 */ - {0x0A2E, 0x8c4e, 0x04}, /* HSWGT3UT28W, HSWM_w7 */ - {0x0A1E, 0x8c4e, 0x04}, /* HSWGT2UX, HSWM_w7 */ - {0x0A0E, 0x8c4e, 0x04}, /* HSWGT1ULX, HSWM_w7 */ - /* HSW CRW */ - {0x0D26, 0x8c4e, 0x04}, /* HSWGT3CW, HSWM_w7 */ - {0x0D22, 0x8c4e, 0x04}, /* HSWGT3CWDT, HSWD_w7 */ - /* HSW Server */ - {0x041A, 0x8c4e, 0x04}, /* HSWSVGT2, HSWD_w7 */ - /* HSW SRVR */ - {0x040A, 0x8c4e, 0x04}, /* HSWSVGT1, HSWD_w7 */ - /* BSW */ - {0x1606, 0x9cc3, 0x03}, /* BDWULTGT1, BDWM_w7 */ - {0x1616, 0x9cc3, 0x03}, /* BDWULTGT2, BDWM_w7 */ - {0x1626, 0x9cc3, 0x03}, /* BDWULTGT3, BDWM_w7 */ - {0x160E, 0x9cc3, 0x03}, /* BDWULXGT1, BDWM_w7 */ - {0x161E, 0x9cc3, 0x03}, /* BDWULXGT2, BDWM_w7 */ - {0x1602, 0x9cc3, 0x03}, /* BDWHALOGT1, BDWM_w7 */ - {0x1612, 0x9cc3, 0x03}, /* BDWHALOGT2, BDWM_w7 */ - {0x1622, 0x9cc3, 0x03}, /* BDWHALOGT3, BDWM_w7 */ - {0x162B, 0x9cc3, 0x03}, /* BDWHALO28W, BDWM_w7 */ - {0x162A, 0x9cc3, 0x03}, /* BDWGT3WRKS, BDWM_w7 */ - {0x162D, 0x9cc3, 0x03}, /* BDWGT3SRVR, BDWM_w7 */ -}; - -static void isa_bridge_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - - dc->desc = "ISA bridge faked to support IGD PT"; - set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - k->vendor_id = PCI_VENDOR_ID_INTEL; - k->class_id = PCI_CLASS_BRIDGE_ISA; -}; - -static const TypeInfo isa_bridge_info = { - .name = "igd-passthrough-isa-bridge", - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIDevice), - .class_init = isa_bridge_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { }, - }, -}; - -static void pt_graphics_register_types(void) -{ - type_register_static(&isa_bridge_info); -} -type_init(pt_graphics_register_types) - -void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id) -{ - struct PCIDevice *bridge_dev; - int i, num; - uint16_t pch_dev_id = 0xffff; - uint8_t pch_rev_id = 0; - - num = ARRAY_SIZE(igd_combo_id_infos); - for (i = 0; i < num; i++) { - if (gpu_dev_id == igd_combo_id_infos[i].gpu_device_id) { - pch_dev_id = igd_combo_id_infos[i].pch_device_id; - pch_rev_id = igd_combo_id_infos[i].pch_revision_id; - } - } - - if (pch_dev_id == 0xffff) { - return; - } - - /* Currently IGD drivers always need to access PCH by 1f.0. */ - bridge_dev = pci_create_simple(bus, PCI_DEVFN(0x1f, 0), - "igd-passthrough-isa-bridge"); - - /* - * Note that vendor id is always PCI_VENDOR_ID_INTEL. - */ - if (!bridge_dev) { - fprintf(stderr, "set igd-passthrough-isa-bridge failed!\n"); - return; - } - pci_config_set_device_id(bridge_dev->config, pch_dev_id); - pci_config_set_revision(bridge_dev->config, pch_rev_id); -} - #ifdef CONFIG_ISAPC static void isapc_machine_options(MachineClass *m) { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 302288342a..42eb8b9707 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -265,7 +265,9 @@ static void pc_q35_init(MachineState *machine) pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq); isa_bus = ich9_lpc->isa_bus; - pc_i8259_create(isa_bus, gsi_state->i8259_irq); + if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { + pc_i8259_create(isa_bus, gsi_state->i8259_irq); + } if (pcmc->pci_enabled) { ioapic_init_gsi(gsi_state, "q35"); diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 79ebdface6..78b05ab7a2 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -38,6 +38,7 @@ #include "sysemu/replay.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-timers.h" +#include "sysemu/xen.h" #include "trace.h" #include "hw/i386/x86.h" @@ -122,6 +123,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) */ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ms->smp.max_cpus - 1) + 1; + + /* + * Can we support APIC ID 255 or higher? + * + * Under Xen: yes. + * With userspace emulated lapic: no + * With KVM's in-kernel lapic: only if X2APIC API is enabled. + */ + if (x86ms->apic_id_limit > 255 && !xen_enabled() && + (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { + error_report("current -smp configuration requires kernel " + "irqchip and X2APIC API support."); + exit(EXIT_FAILURE); + } + possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); @@ -1228,6 +1244,40 @@ static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &x86ms->acpi, errp); } +static void x86_machine_get_pit(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + OnOffAuto pit = x86ms->pit; + + visit_type_OnOffAuto(v, name, &pit, errp); +} + +static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj);; + + visit_type_OnOffAuto(v, name, &x86ms->pit, errp); +} + +static void x86_machine_get_pic(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + OnOffAuto pic = x86ms->pic; + + visit_type_OnOffAuto(v, name, &pic, errp); +} + +static void x86_machine_set_pic(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &x86ms->pic, errp); +} + static char *x86_machine_get_oem_id(Object *obj, Error **errp) { X86MachineState *x86ms = X86_MACHINE(obj); @@ -1317,6 +1367,8 @@ static void x86_machine_initfn(Object *obj) x86ms->smm = ON_OFF_AUTO_AUTO; x86ms->acpi = ON_OFF_AUTO_AUTO; + x86ms->pit = ON_OFF_AUTO_AUTO; + x86ms->pic = ON_OFF_AUTO_AUTO; x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); @@ -1348,6 +1400,20 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, X86_MACHINE_ACPI, "Enable ACPI"); + object_class_property_add(oc, X86_MACHINE_PIT, "OnOffAuto", + x86_machine_get_pit, + x86_machine_set_pit, + NULL, NULL); + object_class_property_set_description(oc, X86_MACHINE_PIT, + "Enable i8254 PIT"); + + object_class_property_add(oc, X86_MACHINE_PIC, "OnOffAuto", + x86_machine_get_pic, + x86_machine_set_pic, + NULL, NULL); + object_class_property_set_description(oc, X86_MACHINE_PIC, + "Enable i8259 PIC"); + object_class_property_add_str(oc, X86_MACHINE_OEM_ID, x86_machine_get_oem_id, x86_machine_set_oem_id); diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c index aeb0624fe5..1352e372ff 100644 --- a/hw/input/vhost-user-input.c +++ b/hw/input/vhost-user-input.c @@ -78,6 +78,12 @@ static void vhost_input_set_config(VirtIODevice *vdev, virtio_notify_config(vdev); } +static struct vhost_dev *vhost_input_get_vhost(VirtIODevice *vdev) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + return &vhi->vhost->dev; +} + static const VMStateDescription vmstate_vhost_input = { .name = "vhost-user-input", .unmigratable = 1, @@ -92,6 +98,7 @@ static void vhost_input_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_vhost_input; vdc->get_config = vhost_input_get_config; vdc->set_config = vhost_input_set_config; + vdc->get_vhost = vhost_input_get_vhost; vic->realize = vhost_input_realize; vic->change_active = vhost_input_change_active; } diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index 54bcb46c74..5b5398b3ca 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -257,8 +257,7 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp) vinput->cfg_size += 8; assert(vinput->cfg_size <= sizeof(virtio_input_config)); - virtio_init(vdev, "virtio-input", VIRTIO_ID_INPUT, - vinput->cfg_size); + virtio_init(vdev, VIRTIO_ID_INPUT, vinput->cfg_size); vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt); vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts); } diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index 03dbb3c7df..73c5ae8ad9 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -11,3 +11,8 @@ config NVDIMM config SPARSE_MEM bool + +config CXL_MEM_DEVICE + bool + default y if CXL + select MEM_DEVICE diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c new file mode 100644 index 0000000000..3bf2869573 --- /dev/null +++ b/hw/mem/cxl_type3.c @@ -0,0 +1,371 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "hw/mem/memory-device.h" +#include "hw/mem/pc-dimm.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/pmem.h" +#include "qemu/range.h" +#include "qemu/rcu.h" +#include "sysemu/hostmem.h" +#include "hw/cxl/cxl.h" + +static void build_dvsecs(CXLType3Dev *ct3d) +{ + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECDevice){ + .cap = 0x1e, + .ctrl = 0x2, + .status2 = 0x2, + .range1_size_hi = ct3d->hostmem->size >> 32, + .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (ct3d->hostmem->size & 0xF0000000), + .range1_base_hi = 0, + .range1_base_lo = 0, + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + PCIE_CXL_DEVICE_DVSEC_LENGTH, + PCIE_CXL_DEVICE_DVSEC, + PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX, + .reg1_base_hi = 0, + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); + dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){ + .phase2_duration = 0x603, /* 3 seconds */ + .phase2_power = 0x33, /* 0x33 miliwatts */ + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + GPF_DEVICE_DVSEC_LENGTH, GPF_PORT_DVSEC, + GPF_DEVICE_DVSEC_REVID, dvsec); +} + +static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) +{ + ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; + uint32_t *cache_mem = cregs->cache_mem_registers; + + assert(which == 0); + + /* TODO: Sanity checks that the decoder is possible */ + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); + + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); +} + +static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate); + uint32_t *cache_mem = cregs->cache_mem_registers; + bool should_commit = false; + int which_hdm = -1; + + assert(size == 4); + g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE); + + switch (offset) { + case A_CXL_HDM_DECODER0_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + which_hdm = 0; + break; + default: + break; + } + + stl_le_p((uint8_t *)cache_mem + offset, value); + if (should_commit) { + hdm_decoder_commit(ct3d, which_hdm); + } +} + +static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) +{ + DeviceState *ds = DEVICE(ct3d); + MemoryRegion *mr; + char *name; + + if (!ct3d->hostmem) { + error_setg(errp, "memdev property must be set"); + return false; + } + + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + error_setg(errp, "memdev property must be set"); + return false; + } + memory_region_set_nonvolatile(mr, true); + memory_region_set_enabled(mr, true); + host_memory_backend_set_mapped(ct3d->hostmem, true); + + if (ds->id) { + name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id); + } else { + name = g_strdup("cxl-type3-dpa-space"); + } + address_space_init(&ct3d->hostmem_as, mr, name); + g_free(name); + + ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size; + + if (!ct3d->lsa) { + error_setg(errp, "lsa property must be set"); + return false; + } + + return true; +} + +static void ct3_realize(PCIDevice *pci_dev, Error **errp) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + MemoryRegion *mr = ®s->component_registers; + uint8_t *pci_conf = pci_dev->config; + + if (!cxl_setup_memory(ct3d, errp)) { + return; + } + + pci_config_set_prog_interface(pci_conf, 0x10); + pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL); + + pcie_endpoint_cap_init(pci_dev, 0x80); + cxl_cstate->dvsec_offset = 0x100; + + ct3d->cxl_cstate.pdev = pci_dev; + build_dvsecs(ct3d); + + regs->special_ops = g_new0(MemoryRegionOps, 1); + regs->special_ops->write = ct3d_reg_write; + + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, + TYPE_CXL_TYPE3); + + pci_register_bar( + pci_dev, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr); + + cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate); + pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + &ct3d->cxl_dstate.device_registers); +} + +static void ct3_exit(PCIDevice *pci_dev) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + + g_free(regs->special_ops); + address_space_destroy(&ct3d->hostmem_as); +} + +/* TODO: Support multiple HDM decoders and DPA skip */ +static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) +{ + uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; + uint64_t decoder_base, decoder_size, hpa_offset; + uint32_t hdm0_ctrl; + int ig, iw; + + decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) | + cache_mem[R_CXL_HDM_DECODER0_BASE_LO]); + if ((uint64_t)host_addr < decoder_base) { + return false; + } + + hpa_offset = (uint64_t)host_addr - decoder_base; + + decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) | + cache_mem[R_CXL_HDM_DECODER0_SIZE_LO]; + if (hpa_offset >= decoder_size) { + return false; + } + + hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW); + ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG); + + *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | + ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw); + + return true; +} + +MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + CXLType3Dev *ct3d = CXL_TYPE3(d); + uint64_t dpa_offset; + MemoryRegion *mr; + + /* TODO support volatile region */ + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + return MEMTX_ERROR; + } + + if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { + return MEMTX_ERROR; + } + + if (dpa_offset > int128_get64(mr->size)) { + return MEMTX_ERROR; + } + + return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size); +} + +MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + CXLType3Dev *ct3d = CXL_TYPE3(d); + uint64_t dpa_offset; + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + return MEMTX_OK; + } + + if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { + return MEMTX_OK; + } + + if (dpa_offset > int128_get64(mr->size)) { + return MEMTX_OK; + } + return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs, + &data, size); +} + +static void ct3d_reset(DeviceState *dev) +{ + CXLType3Dev *ct3d = CXL_TYPE3(dev); + uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE); + cxl_device_register_init_common(&ct3d->cxl_dstate); +} + +static Property ct3_props[] = { + DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static uint64_t get_lsa_size(CXLType3Dev *ct3d) +{ + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(ct3d->lsa); + return memory_region_size(mr); +} + +static void validate_lsa_access(MemoryRegion *mr, uint64_t size, + uint64_t offset) +{ + assert(offset + size <= memory_region_size(mr)); + assert(offset + size > offset); +} + +static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size, + uint64_t offset) +{ + MemoryRegion *mr; + void *lsa; + + mr = host_memory_backend_get_memory(ct3d->lsa); + validate_lsa_access(mr, size, offset); + + lsa = memory_region_get_ram_ptr(mr) + offset; + memcpy(buf, lsa, size); + + return size; +} + +static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, + uint64_t offset) +{ + MemoryRegion *mr; + void *lsa; + + mr = host_memory_backend_get_memory(ct3d->lsa); + validate_lsa_access(mr, size, offset); + + lsa = memory_region_get_ram_ptr(mr) + offset; + memcpy(lsa, buf, size); + memory_region_set_dirty(mr, offset, size); + + /* + * Just like the PMEM, if the guest is not allowed to exit gracefully, label + * updates will get lost. + */ +} + +static void ct3_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + CXLType3Class *cvc = CXL_TYPE3_CLASS(oc); + + pc->realize = ct3_realize; + pc->exit = ct3_exit; + pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->vendor_id = PCI_VENDOR_ID_INTEL; + pc->device_id = 0xd93; /* LVF for now */ + pc->revision = 1; + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "CXL PMEM Device (Type 3)"; + dc->reset = ct3d_reset; + device_class_set_props(dc, ct3_props); + + cvc->get_lsa_size = get_lsa_size; + cvc->get_lsa = get_lsa; + cvc->set_lsa = set_lsa; +} + +static const TypeInfo ct3d_info = { + .name = TYPE_CXL_TYPE3, + .parent = TYPE_PCI_DEVICE, + .class_size = sizeof(struct CXLType3Class), + .class_init = ct3_class_init, + .instance_size = sizeof(CXLType3Dev), + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CXL_DEVICE }, + { INTERFACE_PCIE_DEVICE }, + {} + }, +}; + +static void ct3d_registers(void) +{ + type_register_static(&ct3d_info); +} + +type_init(ct3d_registers); diff --git a/hw/mem/meson.build b/hw/mem/meson.build index 82f86d117e..609b2b36fc 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -3,6 +3,7 @@ mem_ss.add(files('memory-device.c')) mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) +mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) diff --git a/hw/meson.build b/hw/meson.build index b3366c888e..9992c5101e 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -6,6 +6,7 @@ subdir('block') subdir('char') subdir('core') subdir('cpu') +subdir('cxl') subdir('display') subdir('dma') subdir('gpio') diff --git a/hw/net/can/ctu_can_fd_frame.h b/hw/net/can/ctu_can_fd_frame.h index 04d956c84e..459c4a0ada 100644 --- a/hw/net/can/ctu_can_fd_frame.h +++ b/hw/net/can/ctu_can_fd_frame.h @@ -29,8 +29,8 @@ /* This file is autogenerated, DO NOT EDIT! */ -#ifndef __CTU_CAN_FD_CAN_FD_FRAME_FORMAT__ -#define __CTU_CAN_FD_CAN_FD_FRAME_FORMAT__ +#ifndef HW_CAN_CTU_CAN_FD_FRAME_H +#define HW_CAN_CTU_CAN_FD_FRAME_H /* CAN_Frame_format memory map */ enum ctu_can_fd_can_frame_format { diff --git a/hw/net/can/ctu_can_fd_regs.h b/hw/net/can/ctu_can_fd_regs.h index 450f4b9fb3..57859b87bc 100644 --- a/hw/net/can/ctu_can_fd_regs.h +++ b/hw/net/can/ctu_can_fd_regs.h @@ -29,8 +29,8 @@ /* This file is autogenerated, DO NOT EDIT! */ -#ifndef __CTU_CAN_FD_CAN_FD_REGISTER_MAP__ -#define __CTU_CAN_FD_CAN_FD_REGISTER_MAP__ +#ifndef HW_CAN_CTU_CAN_FD_REGS_H +#define HW_CAN_CTU_CAN_FD_REGS_H /* CAN_Registers memory map */ enum ctu_can_fd_can_registers { diff --git a/hw/net/tulip.c b/hw/net/tulip.c index d5b6cc5ee6..097e905bec 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -967,6 +967,8 @@ static void pci_tulip_realize(PCIDevice *pci_dev, Error **errp) pci_conf = s->dev.config; pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ + qemu_macaddr_default_if_unset(&s->c.macaddr); + s->eeprom = eeprom93xx_new(&pci_dev->qdev, 64); tulip_fill_eeprom(s); @@ -981,8 +983,6 @@ static void pci_tulip_realize(PCIDevice *pci_dev, Error **errp) s->irq = pci_allocate_irq(&s->dev); - qemu_macaddr_default_if_unset(&s->c.macaddr); - s->nic = qemu_new_nic(&net_tulip_info, &s->c, object_get_typename(OBJECT(pci_dev)), pci_dev->qdev.id, s); diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 30379d2ca4..ccac5b7a64 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -201,7 +201,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); } if (~net->dev.features & net->dev.backend_features) { - fprintf(stderr, "vhost lacks feature mask %" PRIu64 + fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 " for backend\n", (uint64_t)(~net->dev.features & net->dev.backend_features)); goto fail; @@ -213,7 +213,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { features = vhost_user_get_acked_features(net->nc); if (~net->dev.features & features) { - fprintf(stderr, "vhost lacks feature mask %" PRIu64 + fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 " for backend\n", (uint64_t)(~net->dev.features & features)); goto fail; @@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, r = vhost_set_vring_enable(peer, peer->vring_enable); if (r < 0) { + vhost_net_stop_one(get_vhost_net(peer), dev); goto err_start; } } @@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, err_start: while (--i >= 0) { - peer = qemu_get_peer(ncs , i); + peer = qemu_get_peer(ncs, i < data_queue_pairs ? + i : n->max_queue_pairs); vhost_net_stop_one(get_vhost_net(peer), dev); } e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 1067e72b39..7ad948ee7c 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qemu/atomic.h" #include "qemu/iov.h" +#include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "hw/virtio/virtio.h" @@ -245,7 +246,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; - int cvq = n->max_ncs - n->max_queue_pairs; + int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? + n->max_ncs - n->max_queue_pairs : 0; if (!get_vhost_net(nc->peer)) { return; @@ -1379,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, { VirtIODevice *vdev = VIRTIO_DEVICE(n); uint16_t queue_pairs; + NetClientState *nc = qemu_get_queue(n->nic); virtio_net_disable_rss(n); if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { @@ -1410,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, return VIRTIO_NET_ERR; } + /* Avoid changing the number of queue_pairs for vdpa device in + * userspace handler. A future fix is needed to handle the mq + * change in userspace handler with vhost-vdpa. Let's disable + * the mq handling from userspace for now and only allow get + * done through the kernel. Ripples may be seen when falling + * back to userspace, but without doing it qemu process would + * crash on a recursive entry to virtio_net_set_status(). + */ + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + return VIRTIO_NET_ERR; + } + n->curr_queue_pairs = queue_pairs; /* stop the backend before changing the number of queue_pairs to avoid handling a * disabled queue */ @@ -1443,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) } iov_cnt = elem->out_num; - iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); + iov2 = iov = g_memdup2(elem->out_sg, + sizeof(struct iovec) * elem->out_num); s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); if (s != sizeof(ctrl)) { @@ -3170,8 +3186,22 @@ static NetClientInfo net_virtio_info = { static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) { VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); + NetClientState *nc; assert(n->vhost_started); + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { + /* Must guard against invalid features and bogus queue index + * from being set by malicious guest, or penetrated through + * buggy migration stream. + */ + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: bogus vq index ignored\n", __func__); + return false; + } + nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); } @@ -3179,8 +3209,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) { VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); + NetClientState *nc; assert(n->vhost_started); + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { + /* Must guard against invalid features and bogus queue index + * from being set by malicious guest, or penetrated through + * buggy migration stream. + */ + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: bogus vq index ignored\n", __func__); + return; + } + nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); } @@ -3392,7 +3436,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) } virtio_net_set_config_size(n, n->host_features); - virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); + virtio_init(vdev, VIRTIO_ID_NET, n->config_size); /* * We set a lower limit on RX queue size to what it always was. @@ -3619,6 +3663,14 @@ static bool dev_unplug_pending(void *opaque) return vdc->primary_unplug_pending(dev); } +static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); + struct vhost_net *net = get_vhost_net(nc->peer); + return &net->dev; +} + static const VMStateDescription vmstate_virtio_net = { .name = "virtio-net", .minimum_version_id = VIRTIO_NET_VM_VERSION, @@ -3721,6 +3773,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) vdc->post_load = virtio_net_post_load_virtio; vdc->vmsd = &vmstate_virtio_net_device; vdc->primary_unplug_pending = primary_unplug_pending; + vdc->get_vhost = virtio_net_get_vhost; } static const TypeInfo virtio_net_info = { diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 739c8b8f79..6773819325 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -15,8 +15,8 @@ * This code is licensed under the GNU GPL v2 or later. */ -#ifndef HW_NVME_INTERNAL_H -#define HW_NVME_INTERNAL_H +#ifndef HW_NVME_NVME_H +#define HW_NVME_NVME_H #include "qemu/uuid.h" #include "hw/pci/pci.h" @@ -519,4 +519,4 @@ void nvme_rw_complete_cb(void *opaque, int ret); uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, NvmeCmd *cmd); -#endif /* HW_NVME_INTERNAL_H */ +#endif /* HW_NVME_NVME_H */ diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 4125cbebcd..d605f3f45a 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -178,21 +178,13 @@ error: static void fw_cfg_bootsplash(FWCfgState *s) { - const char *boot_splash_filename = NULL; - const char *boot_splash_time = NULL; char *filename, *file_data; gsize file_size; int file_type; - /* get user configuration */ - QemuOptsList *plist = qemu_find_opts("boot-opts"); - QemuOpts *opts = QTAILQ_FIRST(&plist->head); - boot_splash_filename = qemu_opt_get(opts, "splash"); - boot_splash_time = qemu_opt_get(opts, "splash-time"); - /* insert splash time if user configurated */ - if (boot_splash_time) { - int64_t bst_val = qemu_opt_get_number(opts, "splash-time", -1); + if (current_machine->boot_config.has_splash_time) { + int64_t bst_val = current_machine->boot_config.splash_time; uint16_t bst_le16; /* validate the input */ @@ -208,7 +200,8 @@ static void fw_cfg_bootsplash(FWCfgState *s) } /* insert splash file if user configurated */ - if (boot_splash_filename) { + if (current_machine->boot_config.has_splash) { + const char *boot_splash_filename = current_machine->boot_config.splash; filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, boot_splash_filename); if (filename == NULL) { error_report("failed to find file '%s'", boot_splash_filename); @@ -238,17 +231,11 @@ static void fw_cfg_bootsplash(FWCfgState *s) static void fw_cfg_reboot(FWCfgState *s) { - const char *reboot_timeout = NULL; uint64_t rt_val = -1; uint32_t rt_le32; - /* get user configuration */ - QemuOptsList *plist = qemu_find_opts("boot-opts"); - QemuOpts *opts = QTAILQ_FIRST(&plist->head); - reboot_timeout = qemu_opt_get(opts, "reboot-timeout"); - - if (reboot_timeout) { - rt_val = qemu_opt_get_number(opts, "reboot-timeout", -1); + if (current_machine->boot_config.has_reboot_timeout) { + rt_val = current_machine->boot_config.reboot_timeout; /* validate the input */ if (rt_val > 0xffff && rt_val != (uint64_t)-1) { @@ -1133,7 +1120,7 @@ static void fw_cfg_common_realize(DeviceState *dev, Error **errp) fw_cfg_add_bytes(s, FW_CFG_SIGNATURE, (char *)"QEMU", 4); fw_cfg_add_bytes(s, FW_CFG_UUID, &qemu_uuid, 16); fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)!machine->enable_graphics); - fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu); + fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)(machine->boot_config.has_menu && machine->boot_config.menu)); fw_cfg_bootsplash(s); fw_cfg_reboot(s); diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c index 8184caa60b..35adce17ac 100644 --- a/hw/openrisc/openrisc_sim.c +++ b/hw/openrisc/openrisc_sim.c @@ -71,6 +71,10 @@ enum { OR1KSIM_ETHOC_IRQ = 4, }; +enum { + OR1KSIM_UART_COUNT = 4 +}; + static const struct MemmapEntry { hwaddr base; hwaddr size; @@ -78,7 +82,7 @@ static const struct MemmapEntry { [OR1KSIM_DRAM] = { 0x00000000, 0 }, [OR1KSIM_UART] = { 0x90000000, 0x100 }, [OR1KSIM_ETHOC] = { 0x92000000, 0x800 }, - [OR1KSIM_OMPIC] = { 0x98000000, 16 }, + [OR1KSIM_OMPIC] = { 0x98000000, OR1KSIM_CPUS_MAX * 8 }, }; static struct openrisc_boot_info { @@ -239,11 +243,13 @@ static void openrisc_sim_ompic_init(Or1ksimState *state, hwaddr base, static void openrisc_sim_serial_init(Or1ksimState *state, hwaddr base, hwaddr size, int num_cpus, - OpenRISCCPU *cpus[], int irq_pin) + OpenRISCCPU *cpus[], int irq_pin, + int uart_idx) { void *fdt = state->fdt; char *nodename; qemu_irq serial_irq; + char alias[sizeof("uart0")]; int i; if (num_cpus > 1) { @@ -258,7 +264,8 @@ static void openrisc_sim_serial_init(Or1ksimState *state, hwaddr base, serial_irq = get_cpu_irq(cpus, 0, irq_pin); } serial_mm_init(get_system_memory(), base, 0, serial_irq, 115200, - serial_hd(0), DEVICE_NATIVE_ENDIAN); + serial_hd(OR1KSIM_UART_COUNT - uart_idx - 1), + DEVICE_NATIVE_ENDIAN); /* Add device tree node for serial. */ nodename = g_strdup_printf("/serial@%" HWADDR_PRIx, base); @@ -271,7 +278,8 @@ static void openrisc_sim_serial_init(Or1ksimState *state, hwaddr base, /* The /chosen node is created during fdt creation. */ qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", nodename); - qemu_fdt_setprop_string(fdt, "/aliases", "uart0", nodename); + snprintf(alias, sizeof(alias), "uart%d", uart_idx); + qemu_fdt_setprop_string(fdt, "/aliases", alias, nodename); g_free(nodename); } @@ -356,7 +364,7 @@ static uint32_t openrisc_load_fdt(Or1ksimState *state, hwaddr load_start, } /* We put fdt right after the kernel and/or initrd. */ - fdt_addr = ROUND_UP(load_start, 4); + fdt_addr = TARGET_PAGE_ALIGN(load_start); ret = fdt_pack(fdt); /* Should only fail if we've built a corrupted tree */ @@ -410,13 +418,15 @@ static void openrisc_sim_init(MachineState *machine) if (smp_cpus > 1) { openrisc_sim_ompic_init(state, or1ksim_memmap[OR1KSIM_OMPIC].base, - or1ksim_memmap[OR1KSIM_UART].size, + or1ksim_memmap[OR1KSIM_OMPIC].size, smp_cpus, cpus, OR1KSIM_OMPIC_IRQ); } - openrisc_sim_serial_init(state, or1ksim_memmap[OR1KSIM_UART].base, - or1ksim_memmap[OR1KSIM_UART].size, smp_cpus, cpus, - OR1KSIM_UART_IRQ); + for (n = 0; n < OR1KSIM_UART_COUNT; ++n) + openrisc_sim_serial_init(state, or1ksim_memmap[OR1KSIM_UART].base + + or1ksim_memmap[OR1KSIM_UART].size * n, + or1ksim_memmap[OR1KSIM_UART].size, + smp_cpus, cpus, OR1KSIM_UART_IRQ, n); load_addr = openrisc_load_kernel(ram_size, kernel_filename); if (load_addr > 0) { diff --git a/hw/pci-bridge/Kconfig b/hw/pci-bridge/Kconfig index f8df4315ba..02614f49aa 100644 --- a/hw/pci-bridge/Kconfig +++ b/hw/pci-bridge/Kconfig @@ -27,3 +27,8 @@ config DEC_PCI config SIMBA bool + +config CXL + bool + default y if PCI_EXPRESS && PXB + depends on PCI_EXPRESS && MSI_NONBROKEN && PXB diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c new file mode 100644 index 0000000000..fb213fa06e --- /dev/null +++ b/hw/pci-bridge/cxl_root_port.c @@ -0,0 +1,236 @@ +/* + * CXL 2.0 Root Port Implementation + * + * Copyright(C) 2020 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/range.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pcie_port.h" +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "qapi/error.h" +#include "hw/cxl/cxl.h" + +#define CXL_ROOT_PORT_DID 0x7075 + +/* Copied from the gen root port which we derive */ +#define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100 +#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \ + (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF) +#define CXL_ROOT_PORT_DVSEC_OFFSET \ + (GEN_PCIE_ROOT_PORT_ACS_OFFSET + PCI_ACS_SIZEOF) + +typedef struct CXLRootPort { + /*< private >*/ + PCIESlot parent_obj; + + CXLComponentState cxl_cstate; + PCIResReserve res_reserve; +} CXLRootPort; + +#define TYPE_CXL_ROOT_PORT "cxl-rp" +DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT) + +static void latch_registers(CXLRootPort *crp) +{ + uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT); +} + +static void build_dvsecs(CXLComponentState *cxl) +{ + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + EXTENSIONS_PORT_DVSEC_LENGTH, + EXTENSIONS_PORT_DVSEC, + EXTENSIONS_PORT_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortGPF){ + .rsvd = 0, + .phase1_ctrl = 1, /* 1μs timeout */ + .phase2_ctrl = 1, /* 1μs timeout */ + }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC, + GPF_PORT_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ + .cap = 0x26, /* IO, Mem, non-MLD */ + .ctrl = 0x2, + .status = 0x26, /* same */ + .rcvd_mod_ts_data_phase1 = 0xef, + }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_FLEXBUS_PORT_DVSEC, + PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); +} + +static void cxl_rp_realize(DeviceState *dev, Error **errp) +{ + PCIDevice *pci_dev = PCI_DEVICE(dev); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + CXLRootPort *crp = CXL_ROOT_PORT(dev); + CXLComponentState *cxl_cstate = &crp->cxl_cstate; + ComponentRegisters *cregs = &cxl_cstate->crb; + MemoryRegion *component_bar = &cregs->component_registers; + Error *local_err = NULL; + + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + int rc = + pci_bridge_qemu_reserve_cap_init(pci_dev, 0, crp->res_reserve, errp); + if (rc < 0) { + rpc->parent_class.exit(pci_dev); + return; + } + + if (!crp->res_reserve.io || crp->res_reserve.io == -1) { + pci_word_test_and_clear_mask(pci_dev->wmask + PCI_COMMAND, + PCI_COMMAND_IO); + pci_dev->wmask[PCI_IO_BASE] = 0; + pci_dev->wmask[PCI_IO_LIMIT] = 0; + } + + cxl_cstate->dvsec_offset = CXL_ROOT_PORT_DVSEC_OFFSET; + cxl_cstate->pdev = pci_dev; + build_dvsecs(&crp->cxl_cstate); + + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, + TYPE_CXL_ROOT_PORT); + + pci_register_bar(pci_dev, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + component_bar); +} + +static void cxl_rp_reset(DeviceState *dev) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + CXLRootPort *crp = CXL_ROOT_PORT(dev); + + rpc->parent_reset(dev); + + latch_registers(crp); +} + +static Property gen_rp_props[] = { + DEFINE_PROP_UINT32("bus-reserve", CXLRootPort, res_reserve.bus, -1), + DEFINE_PROP_SIZE("io-reserve", CXLRootPort, res_reserve.io, -1), + DEFINE_PROP_SIZE("mem-reserve", CXLRootPort, res_reserve.mem_non_pref, -1), + DEFINE_PROP_SIZE("pref32-reserve", CXLRootPort, res_reserve.mem_pref_32, + -1), + DEFINE_PROP_SIZE("pref64-reserve", CXLRootPort, res_reserve.mem_pref_64, + -1), + DEFINE_PROP_END_OF_LIST() +}; + +static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr, + uint32_t val, int len) +{ + CXLRootPort *crp = CXL_ROOT_PORT(dev); + + if (range_contains(&crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC], addr)) { + uint8_t *reg = &dev->config[addr]; + addr -= crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC].lob; + if (addr == PORT_CONTROL_OFFSET) { + if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) { + /* unmask SBR */ + qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n"); + } + if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) { + /* Alt Memory & ID Space Enable */ + qemu_log_mask(LOG_UNIMP, + "Alt Memory & ID space is not supported\n"); + } + } + } +} + +static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val, + int len) +{ + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_ctl, &slt_sta); + pci_bridge_write_config(d, address, val, len); + pcie_cap_flr_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); + pcie_aer_write_config(d, address, val, len); + + cxl_rp_dvsec_write_config(d, address, val, len); +} + +static void cxl_root_port_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *k = PCI_DEVICE_CLASS(oc); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(oc); + + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->device_id = CXL_ROOT_PORT_DID; + dc->desc = "CXL Root Port"; + k->revision = 0; + device_class_set_props(dc, gen_rp_props); + k->config_write = cxl_rp_write_config; + + device_class_set_parent_realize(dc, cxl_rp_realize, &rpc->parent_realize); + device_class_set_parent_reset(dc, cxl_rp_reset, &rpc->parent_reset); + + rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET; + rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET; + + dc->hotpluggable = false; +} + +static const TypeInfo cxl_root_port_info = { + .name = TYPE_CXL_ROOT_PORT, + .parent = TYPE_PCIE_ROOT_PORT, + .instance_size = sizeof(CXLRootPort), + .class_init = cxl_root_port_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CXL_DEVICE }, + { } + }, +}; + +static void cxl_register(void) +{ + type_register_static(&cxl_root_port_info); +} + +type_init(cxl_register); diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build index daab8acf2a..b6d26a03d5 100644 --- a/hw/pci-bridge/meson.build +++ b/hw/pci-bridge/meson.build @@ -5,6 +5,7 @@ pci_ss.add(when: 'CONFIG_IOH3420', if_true: files('ioh3420.c')) pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c', 'pcie_pci_bridge.c')) pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c')) pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c')) +pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c')) # NewWorld PowerMac pci_ss.add(when: 'CONFIG_DEC_PCI', if_true: files('dec.c')) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index de932286b5..69244decdb 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -17,6 +17,7 @@ #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/pci/pci_bridge.h" +#include "hw/cxl/cxl.h" #include "qemu/range.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -24,6 +25,8 @@ #include "hw/boards.h" #include "qom/object.h" +enum BusType { PCI, PCIE, CXL }; + #define TYPE_PXB_BUS "pxb-bus" typedef struct PXBBus PXBBus; DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS, @@ -33,6 +36,10 @@ DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS, DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS, TYPE_PXB_PCIE_BUS) +#define TYPE_PXB_CXL_BUS "pxb-cxl-bus" +DECLARE_INSTANCE_CHECKER(PXBBus, PXB_CXL_BUS, + TYPE_PXB_CXL_BUS) + struct PXBBus { /*< private >*/ PCIBus parent_obj; @@ -50,18 +57,13 @@ DECLARE_INSTANCE_CHECKER(PXBDev, PXB_DEV, DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV, TYPE_PXB_PCIE_DEVICE) -struct PXBDev { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - - uint8_t bus_nr; - uint16_t numa_node; - bool bypass_iommu; -}; - static PXBDev *convert_to_pxb(PCIDevice *dev) { + /* A CXL PXB's parent bus is PCIe, so the normal check won't work */ + if (object_dynamic_cast(OBJECT(dev), TYPE_PXB_CXL_DEVICE)) { + return PXB_CXL_DEV(dev); + } + return pci_bus_is_express(pci_get_bus(dev)) ? PXB_PCIE_DEV(dev) : PXB_DEV(dev); } @@ -70,6 +72,13 @@ static GList *pxb_dev_list; #define TYPE_PXB_HOST "pxb-host" +CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb) +{ + CXLHost *host = PXB_CXL_HOST(hb); + + return &host->cxl_cstate; +} + static int pxb_bus_num(PCIBus *bus) { PXBDev *pxb = convert_to_pxb(bus->parent_dev); @@ -106,11 +115,20 @@ static const TypeInfo pxb_pcie_bus_info = { .class_init = pxb_bus_class_init, }; +static const TypeInfo pxb_cxl_bus_info = { + .name = TYPE_PXB_CXL_BUS, + .parent = TYPE_CXL_BUS, + .instance_size = sizeof(PXBBus), + .class_init = pxb_bus_class_init, +}; + static const char *pxb_host_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { - PXBBus *bus = pci_bus_is_express(rootbus) ? - PXB_PCIE_BUS(rootbus) : PXB_BUS(rootbus); + PXBBus *bus = pci_bus_is_cxl(rootbus) ? + PXB_CXL_BUS(rootbus) : + pci_bus_is_express(rootbus) ? PXB_PCIE_BUS(rootbus) : + PXB_BUS(rootbus); snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus)); return bus->bus_path; @@ -166,6 +184,52 @@ static const TypeInfo pxb_host_info = { .class_init = pxb_host_class_init, }; +static void pxb_cxl_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + CXLHost *cxl = PXB_CXL_HOST(dev); + CXLComponentState *cxl_cstate = &cxl->cxl_cstate; + struct MemoryRegion *mr = &cxl_cstate->crb.component_registers; + hwaddr offset; + + cxl_component_register_block_init(OBJECT(dev), cxl_cstate, + TYPE_PXB_CXL_HOST); + sysbus_init_mmio(sbd, mr); + + offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx; + if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) { + error_setg(errp, "Insufficient space for pxb cxl host register space"); + return; + } + + memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr); + ms->cxl_devices_state->next_mr_idx++; +} + +static void pxb_cxl_host_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class); + + hc->root_bus_path = pxb_host_root_bus_path; + dc->fw_name = "cxl"; + dc->realize = pxb_cxl_realize; + /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */ + dc->user_creatable = false; +} + +/* + * This is a device to handle the MMIO for a CXL host bridge. It does nothing + * else. + */ +static const TypeInfo cxl_host_info = { + .name = TYPE_PXB_CXL_HOST, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(CXLHost), + .class_init = pxb_cxl_host_class_init, +}; + /* * Registers the PXB bus as a child of pci host root bus. */ @@ -212,6 +276,17 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) return pin - PCI_SLOT(pxb->devfn); } +static void pxb_dev_reset(DeviceState *dev) +{ + CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge; + CXLComponentState *cxl_cstate = &cxl->cxl_cstate; + uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; + uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8); +} + static gint pxb_compare(gconstpointer a, gconstpointer b) { const PXBDev *pxb_a = a, *pxb_b = b; @@ -221,7 +296,8 @@ static gint pxb_compare(gconstpointer a, gconstpointer b) 0; } -static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) +static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, + Error **errp) { PXBDev *pxb = convert_to_pxb(dev); DeviceState *ds, *bds = NULL; @@ -245,9 +321,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) dev_name = dev->qdev.id; } - ds = qdev_new(TYPE_PXB_HOST); - if (pcie) { + ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST); + if (type == PCIE) { bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS); + } else if (type == CXL) { + bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS); + bus->flags |= PCI_BUS_CXL; + PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds); } else { bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS); bds = qdev_new("pci-bridge"); @@ -295,7 +375,7 @@ static void pxb_dev_realize(PCIDevice *dev, Error **errp) return; } - pxb_dev_realize_common(dev, false, errp); + pxb_dev_realize_common(dev, PCI, errp); } static void pxb_dev_exitfn(PCIDevice *pci_dev) @@ -348,7 +428,7 @@ static void pxb_pcie_dev_realize(PCIDevice *dev, Error **errp) return; } - pxb_dev_realize_common(dev, true, errp); + pxb_dev_realize_common(dev, PCIE, errp); } static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data) @@ -379,13 +459,67 @@ static const TypeInfo pxb_pcie_dev_info = { }, }; +static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + /* A CXL PXB's parent bus is still PCIe */ + if (!pci_bus_is_express(pci_get_bus(dev))) { + error_setg(errp, "pxb-cxl devices cannot reside on a PCI bus"); + return; + } + if (!ms->cxl_devices_state->is_enabled) { + error_setg(errp, "Machine does not have cxl=on"); + return; + } + + pxb_dev_realize_common(dev, CXL, errp); + pxb_dev_reset(DEVICE(dev)); +} + +static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = pxb_cxl_dev_realize; + k->exit = pxb_dev_exitfn; + /* + * XXX: These types of bridges don't actually show up in the hierarchy so + * vendor, device, class, etc. ids are intentionally left out. + */ + + dc->desc = "CXL Host Bridge"; + device_class_set_props(dc, pxb_dev_properties); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + + /* Host bridges aren't hotpluggable. FIXME: spec reference */ + dc->hotpluggable = false; + dc->reset = pxb_dev_reset; +} + +static const TypeInfo pxb_cxl_dev_info = { + .name = TYPE_PXB_CXL_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PXBDev), + .class_init = pxb_cxl_dev_class_init, + .interfaces = + (InterfaceInfo[]){ + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + {}, + }, +}; + static void pxb_register_types(void) { type_register_static(&pxb_bus_info); type_register_static(&pxb_pcie_bus_info); + type_register_static(&pxb_cxl_bus_info); type_register_static(&pxb_host_info); + type_register_static(&cxl_host_info); type_register_static(&pxb_dev_info); type_register_static(&pxb_pcie_dev_info); + type_register_static(&pxb_cxl_dev_info); } type_init(pxb_register_types) diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index f1cfe9d14a..460e48269d 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -67,7 +67,11 @@ static void rp_realize(PCIDevice *d, Error **errp) int rc; pci_config_set_interrupt_pin(d->config, 1); - pci_bridge_initfn(d, TYPE_PCIE_BUS); + if (d->cap_present & QEMU_PCIE_CAP_CXL) { + pci_bridge_initfn(d, TYPE_CXL_BUS); + } else { + pci_bridge_initfn(d, TYPE_PCIE_BUS); + } pcie_port_init_reg(d); rc = pci_bridge_ssvid_init(d, rpc->ssvid_offset, dc->vendor_id, diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index e7e162a00a..7c7316bc96 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -5,6 +5,7 @@ #include "hw/pci/pci_bus.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pcie_host.h" +#include "hw/acpi/cxl.h" static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) { @@ -139,6 +140,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) QLIST_FOREACH(bus, &bus->child, sibling) { uint8_t bus_num = pci_bus_num(bus); uint8_t numa_node = pci_bus_numa_node(bus); + bool is_cxl = pci_bus_is_cxl(bus); if (!pci_bus_is_root(bus)) { continue; @@ -154,8 +156,16 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) } dev = aml_device("PC%.02X", bus_num); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08"))); - aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03"))); + if (is_cxl) { + struct Aml *pkg = aml_package(2); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016"))); + aml_append(pkg, aml_eisaid("PNP0A08")); + aml_append(pkg, aml_eisaid("PNP0A03")); + aml_append(dev, aml_name_decl("_CID", pkg)); + } else { + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08"))); + aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03"))); + } aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); aml_append(dev, aml_name_decl("_STR", aml_unicode("pxb Device"))); @@ -175,7 +185,11 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) cfg->pio.base, 0, 0, 0); aml_append(dev, aml_name_decl("_CRS", crs)); - acpi_dsdt_add_pci_osc(dev); + if (is_cxl) { + build_cxl_osc_method(dev); + } else { + acpi_dsdt_add_pci_osc(dev); + } aml_append(scope, dev); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 9c58f02853..a9b37f8000 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -200,6 +200,11 @@ static const TypeInfo pci_bus_info = { .class_init = pci_bus_class_init, }; +static const TypeInfo cxl_interface_info = { + .name = INTERFACE_CXL_DEVICE, + .parent = TYPE_INTERFACE, +}; + static const TypeInfo pcie_interface_info = { .name = INTERFACE_PCIE_DEVICE, .parent = TYPE_INTERFACE, @@ -223,6 +228,12 @@ static const TypeInfo pcie_bus_info = { .class_init = pcie_bus_class_init, }; +static const TypeInfo cxl_bus_info = { + .name = TYPE_CXL_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pcie_bus_class_init, +}; + static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num); static void pci_update_mappings(PCIDevice *d); static void pci_irq_handler(void *opaque, int irq_num, int level); @@ -2182,6 +2193,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } + if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) { + pci_dev->cap_present |= QEMU_PCIE_CAP_CXL; + } + pci_dev = do_pci_register_device(pci_dev, object_get_typename(OBJECT(qdev)), pci_dev->devfn, errp); @@ -2747,7 +2762,9 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE); ObjectClass *pcie = object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE); - assert(conventional || pcie); + ObjectClass *cxl = + object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE); + assert(conventional || pcie || cxl); } } @@ -2937,7 +2954,9 @@ static void pci_register_types(void) { type_register_static(&pci_bus_info); type_register_static(&pcie_bus_info); + type_register_static(&cxl_bus_info); type_register_static(&conventional_pci_interface_info); + type_register_static(&cxl_interface_info); type_register_static(&pcie_interface_info); type_register_static(&pci_device_type_info); } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index e95c1e5519..687e4e763a 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -136,6 +136,31 @@ static void pcie_port_class_init(ObjectClass *oc, void *data) device_class_set_props(dc, pcie_port_props); } +PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn) +{ + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + PCIEPort *port; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + + if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + continue; + } + + port = PCIE_PORT(d); + if (port->port == pn) { + return d; + } + } + + return NULL; +} + static const TypeInfo pcie_port_type_info = { .name = TYPE_PCIE_PORT, .parent = TYPE_PCI_BRIDGE, diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index e8ef1a9e5d..c865921bdc 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -111,7 +111,7 @@ static void ppc_core99_init(MachineState *machine) const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; - const char *boot_device = machine->boot_order; + const char *boot_device = machine->boot_config.order; Core99MachineState *core99_machine = CORE99_MACHINE(machine); PowerPCCPU *cpu = NULL; CPUPPCState *env = NULL; diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index fe2adb057b..d62fdf0db3 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -82,7 +82,7 @@ static void ppc_heathrow_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; const char *bios_name = machine->firmware ?: PROM_FILENAME; - const char *boot_device = machine->boot_order; + const char *boot_device = machine->boot_config.order; PowerPCCPU *cpu = NULL; CPUPPCState *env = NULL; char *filename; diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index bf622aa38f..a1cd4505cc 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -381,7 +381,7 @@ static void ibm_40p_init(MachineState *machine) } boot_device = 'm'; } else { - boot_device = machine->boot_order[0]; + boot_device = machine->boot_config.order[0]; } fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8bbae68e1b..6de800524a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1044,8 +1044,8 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0)); } } - if (boot_menu) { - _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu))); + if (machine->boot_config.has_menu && machine->boot_config.menu) { + _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", true))); } _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width)); _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height)); diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c index 2a4aa651ca..9bd98e8219 100644 --- a/hw/remote/mpqemu-link.c +++ b/hw/remote/mpqemu-link.c @@ -68,7 +68,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) } if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), - fds, nfds, errp)) { + fds, nfds, 0, errp)) { ret = true; } else { trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 4b5eb77afd..8612684d48 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -290,13 +290,10 @@ static Property s390_ipl_properties[] = { static void s390_ipl_set_boot_menu(S390IPLState *ipl) { - QemuOptsList *plist = qemu_find_opts("boot-opts"); - QemuOpts *opts = QTAILQ_FIRST(&plist->head); - const char *tmp; unsigned long splash_time = 0; if (!get_boot_device(0)) { - if (boot_menu) { + if (current_machine->boot_config.has_menu && current_machine->boot_config.menu) { error_report("boot menu requires a bootindex to be specified for " "the IPL device"); } @@ -306,7 +303,7 @@ static void s390_ipl_set_boot_menu(S390IPLState *ipl) switch (ipl->iplb.pbt) { case S390_IPL_TYPE_CCW: /* In the absence of -boot menu, use zipl parameters */ - if (!qemu_opt_get(opts, "menu")) { + if (!current_machine->boot_config.has_menu) { ipl->qipl.qipl_flags |= QIPL_FLAG_BM_OPTS_ZIPL; return; } @@ -314,26 +311,21 @@ static void s390_ipl_set_boot_menu(S390IPLState *ipl) case S390_IPL_TYPE_QEMU_SCSI: break; default: - if (boot_menu) { + if (current_machine->boot_config.has_menu && current_machine->boot_config.menu) { error_report("boot menu is not supported for this device type"); } return; } - if (!boot_menu) { + if (!current_machine->boot_config.has_menu || !current_machine->boot_config.menu) { return; } ipl->qipl.qipl_flags |= QIPL_FLAG_BM_OPTS_CMD; - tmp = qemu_opt_get(opts, "splash-time"); - - if (tmp && qemu_strtoul(tmp, NULL, 10, &splash_time)) { - error_report("splash-time is invalid, forcing it to 0"); - ipl->qipl.boot_menu_timeout = 0; - return; + if (current_machine->boot_config.has_splash_time) { + splash_time = current_machine->boot_config.splash_time; } - if (splash_time > 0xffffffff) { error_report("splash-time is too large, forcing it to max value"); ipl->qipl.boot_menu_timeout = 0xffffffff; diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 778f43e4c1..3059068175 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -273,6 +273,13 @@ static void vhost_scsi_unrealize(DeviceState *dev) virtio_scsi_common_unrealize(dev); } +static struct vhost_dev *vhost_scsi_get_vhost(VirtIODevice *vdev) +{ + VHostSCSI *s = VHOST_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + return &vsc->dev; +} + static Property vhost_scsi_properties[] = { DEFINE_PROP_STRING("vhostfd", VirtIOSCSICommon, conf.vhostfd), DEFINE_PROP_STRING("wwpn", VirtIOSCSICommon, conf.wwpn), @@ -307,6 +314,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_scsi_common_get_features; vdc->set_config = vhost_scsi_common_set_config; vdc->set_status = vhost_scsi_set_status; + vdc->get_vhost = vhost_scsi_get_vhost; fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; } diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 1b2f7eed98..9be21d07ee 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -121,6 +121,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) vsc->dev.backend_features = 0; vqs = vsc->dev.vqs; + s->vhost_user.supports_config = true; ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index db54d104be..4141dddd51 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1013,8 +1013,7 @@ void virtio_scsi_common_realize(DeviceState *dev, VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); int i; - virtio_init(vdev, "virtio-scsi", VIRTIO_ID_SCSI, - sizeof(VirtIOSCSIConfig)); + virtio_init(vdev, VIRTIO_ID_SCSI, sizeof(VirtIOSCSIConfig)); if (s->conf.num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { s->conf.num_queues = 1; diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index b693eea0e0..d9288326d6 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -831,8 +831,7 @@ static void sun4m_hw_init(MachineState *machine) SysBusDevice *s; unsigned int smp_cpus = machine->smp.cpus; unsigned int max_cpus = machine->smp.max_cpus; - Object *ram_memdev = object_resolve_path_type(machine->ram_memdev_id, - TYPE_MEMORY_BACKEND, NULL); + HostMemoryBackend *ram_memdev = machine->memdev; NICInfo *nd = &nd_table[0]; if (machine->ram_size > hwdef->max_mem) { @@ -852,7 +851,7 @@ static void sun4m_hw_init(MachineState *machine) /* Create and map RAM frontend */ dev = qdev_new("memory"); - object_property_set_link(OBJECT(dev), "memdev", ram_memdev, &error_fatal); + object_property_set_link(OBJECT(dev), "memdev", OBJECT(ram_memdev), &error_fatal); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0); @@ -1050,7 +1049,7 @@ static void sun4m_hw_init(MachineState *machine) machine->ram_size, &initrd_size); nvram_init(nvram, (uint8_t *)&nd->macaddr, machine->kernel_cmdline, - machine->boot_order, machine->ram_size, kernel_size, + machine->boot_config.order, machine->ram_size, kernel_size, graphic_width, graphic_height, graphic_depth, hwdef->nvram_machine_id, "Sun4m"); @@ -1091,7 +1090,7 @@ static void sun4m_hw_init(MachineState *machine) } fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, INITRD_LOAD_ADDR); fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, machine->boot_order[0]); + fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, machine->boot_config.order[0]); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 7c461d194a..d1bc77d27e 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -695,7 +695,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem, &kernel_addr, &kernel_entry); sun4u_NVRAM_set_params(nvram, NVRAM_SIZE, "Sun4u", machine->ram_size, - machine->boot_order, + machine->boot_config.order, kernel_addr, kernel_size, machine->kernel_cmdline, initrd_addr, initrd_size, @@ -727,7 +727,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem, } fw_cfg_add_i64(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); fw_cfg_add_i64(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, machine->boot_order[0]); + fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, machine->boot_config.order[0]); fw_cfg_add_i16(fw_cfg, FW_CFG_SPARC64_WIDTH, graphic_width); fw_cfg_add_i16(fw_cfg, FW_CFG_SPARC64_HEIGHT, graphic_height); diff --git a/hw/usb/hcd-dwc2.h b/hw/usb/hcd-dwc2.h index 6998b04706..9c3d88ea14 100644 --- a/hw/usb/hcd-dwc2.h +++ b/hw/usb/hcd-dwc2.h @@ -16,8 +16,8 @@ * GNU General Public License for more details. */ -#ifndef HW_USB_DWC2_H -#define HW_USB_DWC2_H +#ifndef HW_USB_HCD_DWC2_H +#define HW_USB_HCD_DWC2_H #include "qemu/timer.h" #include "hw/irq.h" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 159f910421..29982c7af8 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -355,7 +355,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) } if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) - && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { + && (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { return false; } } @@ -381,8 +381,8 @@ static bool vfio_devices_all_running_and_saving(VFIOContainer *container) return false; } - if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && - (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { + if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) && + (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { continue; } else { return false; diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index ff6b45de6b..a6ad1f8945 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -432,7 +432,7 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) } ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_SAVING); + VFIO_DEVICE_STATE_V1_SAVING); if (ret) { error_report("%s: Failed to set state SAVING", vbasedev->name); return ret; @@ -531,8 +531,8 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) uint64_t data_size; int ret; - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING, - VFIO_DEVICE_STATE_SAVING); + ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, + VFIO_DEVICE_STATE_V1_SAVING); if (ret) { error_report("%s: Failed to set state STOP and SAVING", vbasedev->name); @@ -569,7 +569,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) return ret; } - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0); + ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0); if (ret) { error_report("%s: Failed to set state STOPPED", vbasedev->name); return ret; @@ -609,7 +609,7 @@ static int vfio_load_setup(QEMUFile *f, void *opaque) } ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_RESUMING); + VFIO_DEVICE_STATE_V1_RESUMING); if (ret) { error_report("%s: Failed to set state RESUMING", vbasedev->name); if (migration->region.mmaps) { @@ -717,20 +717,20 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) * In both the above cases, set _RUNNING bit. */ mask = ~VFIO_DEVICE_STATE_MASK; - value = VFIO_DEVICE_STATE_RUNNING; + value = VFIO_DEVICE_STATE_V1_RUNNING; } else { /* * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset * _RUNNING bit */ - mask = ~VFIO_DEVICE_STATE_RUNNING; + mask = ~VFIO_DEVICE_STATE_V1_RUNNING; /* * When VM state transition to stop for savevm command, device should * start saving data. */ if (state == RUN_STATE_SAVE_VM) { - value = VFIO_DEVICE_STATE_SAVING; + value = VFIO_DEVICE_STATE_V1_SAVING; } else { value = 0; } @@ -768,8 +768,9 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_FAILED: bytes_transferred = 0; ret = vfio_migration_set_state(vbasedev, - ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), - VFIO_DEVICE_STATE_RUNNING); + ~(VFIO_DEVICE_STATE_V1_SAVING | + VFIO_DEVICE_STATE_V1_RESUMING), + VFIO_DEVICE_STATE_V1_RUNNING); if (ret) { error_report("%s: Failed to set state RUNNING", vbasedev->name); } @@ -864,8 +865,10 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) goto add_blocker; } - ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, - VFIO_REGION_SUBTYPE_MIGRATION, &info); + ret = vfio_get_dev_region_info(vbasedev, + VFIO_REGION_TYPE_MIGRATION_DEPRECATED, + VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, + &info); if (ret) { goto add_blocker; } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 333348d9d5..ab8e095b73 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -21,6 +21,9 @@ vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_siz vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 +vhost_user_read(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" +vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" +vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" # vhost-vdpa.c vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 @@ -89,7 +92,12 @@ virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d" virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d" -# virtio-iommu.c +# virtio-pci.c +virtio_pci_notify(uint16_t vector) "virtio_pci_notify vec 0x%x" +virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)" +virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)" + +# hw/virtio/virtio-iommu.c virtio_iommu_device_reset(void) "reset!" virtio_iommu_system_reset(void) "system reset!" virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64 diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index e409a865ae..4de8b6b3b0 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -203,7 +203,6 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) return idx - dev->vq_index; } -#ifdef CONFIG_VHOST_VSOCK static int vhost_kernel_vsock_set_guest_cid(struct vhost_dev *dev, uint64_t guest_cid) { @@ -214,7 +213,6 @@ static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start) { return vhost_kernel_call(dev, VHOST_VSOCK_SET_RUNNING, &start); } -#endif /* CONFIG_VHOST_VSOCK */ static void vhost_kernel_iotlb_read(void *opaque) { @@ -319,10 +317,8 @@ const VhostOps kernel_ops = { .vhost_set_owner = vhost_kernel_set_owner, .vhost_reset_device = vhost_kernel_reset_device, .vhost_get_vq_index = vhost_kernel_get_vq_index, -#ifdef CONFIG_VHOST_VSOCK .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid, .vhost_vsock_set_running = vhost_kernel_vsock_set_running, -#endif /* CONFIG_VHOST_VSOCK */ .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg, }; diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c index cb71a294fa..08980bc23b 100644 --- a/hw/virtio/vhost-scsi-pci.c +++ b/hw/virtio/vhost-scsi-pci.c @@ -21,7 +21,7 @@ #include "hw/virtio/vhost-scsi.h" #include "qapi/error.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostSCSIPCI VHostSCSIPCI; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1e5cfe2af6..56c96ebd13 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, for (n = 0; n < num; n++) { if (more_descs || (n + 1 < num)) { descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); + descs[i].next = cpu_to_le16(svq->desc_next[i]); } else { descs[i].flags = flags; } @@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, descs[i].len = cpu_to_le32(iovec[n].iov_len); last = i; - i = cpu_to_le16(descs[i].next); + i = cpu_to_le16(svq->desc_next[i]); } - svq->free_head = le16_to_cpu(descs[last].next); + svq->free_head = le16_to_cpu(svq->desc_next[last]); } static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, @@ -198,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, return true; } +/** + * Add an element to a SVQ. + * + * The caller must check that there is enough slots for the new element. It + * takes ownership of the element: In case of failure, it is free and the SVQ + * is considered broken. + */ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) { unsigned qemu_head; bool ok = vhost_svq_add_split(svq, elem, &qemu_head); if (unlikely(!ok)) { + g_free(elem); return false; } @@ -333,13 +342,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); } +static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, + uint16_t num, uint16_t i) +{ + for (uint16_t j = 0; j < (num - 1); ++j) { + i = le16_to_cpu(svq->desc_next[i]); + } + + return i; +} + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len) { - vring_desc_t *descs = svq->vring.desc; const vring_used_t *used = svq->vring.used; vring_used_elem_t used_elem; - uint16_t last_used; + uint16_t last_used, last_used_chain, num; if (!vhost_svq_more_used(svq)) { return NULL; @@ -365,7 +383,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, return NULL; } - descs[used_elem.id].next = svq->free_head; + num = svq->ring_id_maps[used_elem.id]->in_num + + svq->ring_id_maps[used_elem.id]->out_num; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; svq->free_head = used_elem.id; *len = used_elem.len; @@ -540,8 +561,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); memset(svq->vring.used, 0, device_size); svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); + svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { - svq->vring.desc[i].next = cpu_to_le16(i + 1); + svq->desc_next[i] = cpu_to_le16(i + 1); } } @@ -574,6 +596,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) virtqueue_detach_element(svq->vq, next_avail_elem, 0); } svq->vq = NULL; + g_free(svq->desc_next); g_free(svq->ring_id_maps); qemu_vfree(svq->vring.desc); qemu_vfree(svq->vring.used); diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index e5e24c536d..c132c994e9 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { /* Next VirtQueue element that guest made available */ VirtQueueElement *next_guest_avail_elem; + /* + * Backup next field for each descriptor so we can recover securely, not + * needing to trust the device access. + */ + uint16_t *desc_next; + /* Next head to expose to the device */ uint16_t shadow_avail_idx; diff --git a/hw/virtio/vhost-user-blk-pci.c b/hw/virtio/vhost-user-blk-pci.c index 33b404d8a2..eef8641a98 100644 --- a/hw/virtio/vhost-user-blk-pci.c +++ b/hw/virtio/vhost-user-blk-pci.c @@ -26,7 +26,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostUserBlkPCI VHostUserBlkPCI; diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c index 2ed8492b3f..6829b8b743 100644 --- a/hw/virtio/vhost-user-fs-pci.c +++ b/hw/virtio/vhost-user-fs-pci.c @@ -14,7 +14,7 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-fs.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" struct VHostUserFSPCI { diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index c595957983..e513e4fdda 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -219,8 +219,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS, - sizeof(struct virtio_fs_config)); + virtio_init(vdev, VIRTIO_ID_FS, sizeof(struct virtio_fs_config)); /* Hiprio queue */ fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output); @@ -277,6 +276,12 @@ static void vuf_device_unrealize(DeviceState *dev) fs->vhost_dev.vqs = NULL; } +static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev) +{ + VHostUserFS *fs = VHOST_USER_FS(vdev); + return &fs->vhost_dev; +} + static const VMStateDescription vuf_vmstate = { .name = "vhost-user-fs", .unmigratable = 1, @@ -314,6 +319,7 @@ static void vuf_class_init(ObjectClass *klass, void *data) vdc->set_status = vuf_set_status; vdc->guest_notifier_mask = vuf_guest_notifier_mask; vdc->guest_notifier_pending = vuf_guest_notifier_pending; + vdc->get_vhost = vuf_get_vhost; } static const TypeInfo vuf_info = { diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c index 70b7b65fd9..00ac10941f 100644 --- a/hw/virtio/vhost-user-i2c-pci.c +++ b/hw/virtio/vhost-user-i2c-pci.c @@ -9,7 +9,7 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-i2c.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" struct VHostUserI2CPCI { VirtIOPCIProxy parent_obj; diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index 42c7f6d9e5..6020eee093 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -14,11 +14,6 @@ #include "qemu/error-report.h" #include "standard-headers/linux/virtio_ids.h" -/* Remove this once the header is updated in Linux kernel */ -#ifndef VIRTIO_ID_I2C_ADAPTER -#define VIRTIO_ID_I2C_ADAPTER 34 -#endif - static const int feature_bits[] = { VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, VHOST_INVALID_FEATURE_BIT @@ -227,7 +222,7 @@ static void vu_i2c_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0); + virtio_init(vdev, VIRTIO_ID_I2C_ADAPTER, 0); i2c->vhost_dev.nvqs = 1; i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output); diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c index c9d3e9113a..b858898a36 100644 --- a/hw/virtio/vhost-user-input-pci.c +++ b/hw/virtio/vhost-user-input-pci.c @@ -9,7 +9,7 @@ #include "hw/virtio/virtio-input.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostUserInputPCI VHostUserInputPCI; diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c index c83dc86813..f64935453b 100644 --- a/hw/virtio/vhost-user-rng-pci.c +++ b/hw/virtio/vhost-user-rng-pci.c @@ -9,7 +9,7 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-rng.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" struct VHostUserRNGPCI { VirtIOPCIProxy parent_obj; diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 209ee5bf9a..3a7bf8e32d 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -203,7 +203,7 @@ static void vu_rng_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0); + virtio_init(vdev, VIRTIO_ID_RNG, 0); rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output); if (!rng->req_vq) { @@ -247,6 +247,12 @@ static void vu_rng_device_unrealize(DeviceState *dev) vhost_user_cleanup(&rng->vhost_user); } +static struct vhost_dev *vu_rng_get_vhost(VirtIODevice *vdev) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + return &rng->vhost_dev; +} + static const VMStateDescription vu_rng_vmstate = { .name = "vhost-user-rng", .unmigratable = 1, @@ -272,6 +278,7 @@ static void vu_rng_class_init(ObjectClass *klass, void *data) vdc->set_status = vu_rng_set_status; vdc->guest_notifier_mask = vu_rng_guest_notifier_mask; vdc->guest_notifier_pending = vu_rng_guest_notifier_pending; + vdc->get_vhost = vu_rng_get_vhost; } static const TypeInfo vu_rng_info = { diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c index d5343412a1..75882e3cf9 100644 --- a/hw/virtio/vhost-user-scsi-pci.c +++ b/hw/virtio/vhost-user-scsi-pci.c @@ -30,7 +30,7 @@ #include "hw/pci/msix.h" #include "hw/loader.h" #include "sysemu/kvm.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c index 72a96199cd..e5a86e8013 100644 --- a/hw/virtio/vhost-user-vsock-pci.c +++ b/hw/virtio/vhost-user-vsock-pci.c @@ -10,7 +10,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-vsock.h" #include "qom/object.h" diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 52bd682c34..0f8ff99f85 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -107,7 +107,7 @@ static void vuv_device_realize(DeviceState *dev, Error **errp) return; } - vhost_vsock_common_realize(vdev, "vhost-user-vsock"); + vhost_vsock_common_realize(vdev); vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index afd51f79b3..b040c1ad2b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -489,6 +489,8 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, return ret < 0 ? -saved_errno : -EIO; } + trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); + return 0; } @@ -542,6 +544,8 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, } } + trace_vhost_user_read(msg.hdr.request, msg.hdr.flags); + return 0; } @@ -1170,14 +1174,16 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) n->unmap_addr = NULL; } -static void vhost_user_host_notifier_remove(VhostUserState *user, - VirtIODevice *vdev, int queue_idx) +/* + * clean-up function for notifier, will finally free the structure + * under rcu. + */ +static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, + VirtIODevice *vdev) { - VhostUserHostNotifier *n = &user->notifier[queue_idx]; - if (n->addr) { if (vdev) { - virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); + virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); } assert(!n->unmap_addr); n->unmap_addr = n->addr; @@ -1221,6 +1227,15 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) return 0; } +static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, + int idx) +{ + if (idx >= u->notifiers->len) { + return NULL; + } + return g_ptr_array_index(u->notifiers, idx); +} + static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { @@ -1233,7 +1248,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, }; struct vhost_user *u = dev->opaque; - vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index); + VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); + if (n) { + vhost_user_host_notifier_remove(n, dev->vdev); + } ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { @@ -1498,6 +1516,29 @@ static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) return dev->config_ops->vhost_dev_config_notifier(dev); } +/* + * Fetch or create the notifier for a given idx. Newly created + * notifiers are added to the pointer array that tracks them. + */ +static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, + int idx) +{ + VhostUserHostNotifier *n = NULL; + if (idx >= u->notifiers->len) { + g_ptr_array_set_size(u->notifiers, idx); + } + + n = g_ptr_array_index(u->notifiers, idx); + if (!n) { + n = g_new0(VhostUserHostNotifier, 1); + n->idx = idx; + g_ptr_array_insert(u->notifiers, idx, n); + trace_vhost_user_create_notifier(idx, n); + } + + return n; +} + static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, VhostUserVringArea *area, int fd) @@ -1517,9 +1558,12 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return -EINVAL; } - n = &user->notifier[queue_idx]; - - vhost_user_host_notifier_remove(user, vdev, queue_idx); + /* + * Fetch notifier and invalidate any old data before setting up + * new mapped address. + */ + n = fetch_or_create_notifier(user, queue_idx); + vhost_user_host_notifier_remove(n, vdev); if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { return 0; @@ -1945,14 +1989,15 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, Error **errp) { - uint64_t features, protocol_features, ram_slots; + uint64_t features, ram_slots; struct vhost_user *u; + VhostUserState *vus = (VhostUserState *) opaque; int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); u = g_new0(struct vhost_user, 1); - u->user = opaque; + u->user = vus; u->dev = dev; dev->opaque = u; @@ -1963,6 +2008,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { + bool supports_f_config = vus->supports_config || + (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); + uint64_t protocol_features; + dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, @@ -1972,19 +2021,34 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, return -EPROTO; } - dev->protocol_features = - protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; - - if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { - /* Don't acknowledge CONFIG feature if device doesn't support it */ - dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); - } else if (!(protocol_features & - (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { - error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " - "but backend does not support it."); - return -EINVAL; + /* + * We will use all the protocol features we support - although + * we suppress F_CONFIG if we know QEMUs internal code can not support + * it. + */ + protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; + + if (supports_f_config) { + if (!virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_CONFIG)) { + error_setg(errp, "vhost-user device %s expecting " + "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " + "not support it.", dev->vdev->name); + return -EPROTO; + } + } else { + if (virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_CONFIG)) { + warn_reportf_err(*errp, "vhost-user backend supports " + "VHOST_USER_PROTOCOL_F_CONFIG for " + "device %s but QEMU does not.", + dev->vdev->name); + protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); + } } + /* final set of protocol features */ + dev->protocol_features = protocol_features; err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); @@ -2502,6 +2566,20 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev, return vhost_user_write(dev, &msg, &inflight->fd, 1); } +static void vhost_user_state_destroy(gpointer data) +{ + VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; + if (n) { + vhost_user_host_notifier_remove(n, NULL); + object_unparent(OBJECT(&n->mr)); + /* + * We can't free until vhost_user_host_notifier_remove has + * done it's thing so schedule the free with RCU. + */ + g_free_rcu(n, rcu); + } +} + bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) { if (user->chr) { @@ -2510,23 +2588,18 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) } user->chr = chr; user->memory_slots = 0; + user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, + &vhost_user_state_destroy); return true; } void vhost_user_cleanup(VhostUserState *user) { - int i; - VhostUserHostNotifier *n; - if (!user->chr) { return; } memory_region_transaction_begin(); - for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - n = &user->notifier[i]; - vhost_user_host_notifier_remove(user, NULL, i); - object_unparent(OBJECT(&n->mr)); - } + user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); memory_region_transaction_commit(); user->chr = NULL; } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index a30510ed17..66f054a12c 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -368,11 +368,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) v->iova_range.last); } -static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) +/* + * The use of this function is for requests that only need to be + * applied once. Typically such request occurs at the beginning + * of operation, and before setting up queues. It should not be + * used for request that performs operation until all queues are + * set, which would need to check dev->vq_index_end instead. + */ +static bool vhost_vdpa_first_dev(struct vhost_dev *dev) { struct vhost_vdpa *v = dev->opaque; - return v->index != 0; + return v->index == 0; } static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, @@ -453,7 +460,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_get_iova_range(v); - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } @@ -596,7 +603,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } @@ -625,7 +632,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, struct vhost_vdpa *v = dev->opaque; int ret; - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } @@ -667,7 +674,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) features &= f; - if (vhost_vdpa_one_time_request(dev)) { + if (vhost_vdpa_first_dev(dev)) { r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); if (r) { return -EFAULT; @@ -1018,7 +1025,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); struct vhost_vring_addr addr = { - .index = i, + .index = dev->vq_index + i, }; int r; bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); @@ -1120,7 +1127,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { struct vhost_vdpa *v = dev->opaque; - if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { + if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { return 0; } @@ -1172,11 +1179,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { struct vhost_vdpa *v = dev->opaque; + int vdpa_idx = ring->index - dev->vq_index; int ret; if (v->shadow_vqs_enabled) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, - ring->index); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); /* * Setting base as last used idx, so destination will see as available @@ -1242,7 +1249,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index ed706681ac..7394818e00 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -224,12 +224,11 @@ int vhost_vsock_common_post_load(void *opaque, int version_id) return 0; } -void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name) +void vhost_vsock_common_realize(VirtIODevice *vdev) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - virtio_init(vdev, name, VIRTIO_ID_VSOCK, - sizeof(struct virtio_vsock_config)); + virtio_init(vdev, VIRTIO_ID_VSOCK, sizeof(struct virtio_vsock_config)); /* Receive and transmit queues belong to vhost */ vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, @@ -259,6 +258,12 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev) virtio_cleanup(vdev); } +static struct vhost_dev *vhost_vsock_common_get_vhost(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + return &vvc->vhost_dev; +} + static Property vhost_vsock_common_properties[] = { DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket, ON_OFF_AUTO_AUTO), @@ -274,6 +279,7 @@ static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask; vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending; + vdc->get_vhost = vhost_vsock_common_get_vhost; } static const TypeInfo vhost_vsock_common_info = { diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c index 205da8d1f5..9f34414d38 100644 --- a/hw/virtio/vhost-vsock-pci.c +++ b/hw/virtio/vhost-vsock-pci.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-vsock.h" #include "qemu/module.h" diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 714046210b..0338de892f 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -169,7 +169,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) } } - vhost_vsock_common_realize(vdev, "vhost-vsock"); + vhost_vsock_common_realize(vdev); ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd, VHOST_BACKEND_TYPE_KERNEL, 0, errp); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 2bc72c27c5..dd3263df56 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1738,6 +1738,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) /* should only be called after backend is connected */ assert(hdev->vhost_ops); + vdev->vhost_started = true; hdev->started = true; hdev->vdev = vdev; @@ -1810,7 +1811,7 @@ fail_vq: fail_mem: fail_features: - + vdev->vhost_started = false; hdev->started = false; return r; } @@ -1841,6 +1842,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) } vhost_log_put(hdev, true); hdev->started = false; + vdev->vhost_started = false; hdev->vdev = NULL; } diff --git a/hw/virtio/virtio-9p-pci.c b/hw/virtio/virtio-9p-pci.c index e07adcd9ea..94c14f0b98 100644 --- a/hw/virtio/virtio-9p-pci.c +++ b/hw/virtio/virtio-9p-pci.c @@ -15,7 +15,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/9pfs/virtio-9p.h" #include "hw/qdev-properties.h" #include "qemu/module.h" diff --git a/hw/virtio/virtio-balloon-pci.c b/hw/virtio/virtio-balloon-pci.c index 79a3ba979a..ce2645ba71 100644 --- a/hw/virtio/virtio-balloon-pci.c +++ b/hw/virtio/virtio-balloon-pci.c @@ -14,7 +14,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-balloon.h" #include "qapi/error.h" diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 8f1b38ef5c..73ac5eb675 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -882,8 +882,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) VirtIOBalloon *s = VIRTIO_BALLOON(dev); int ret; - virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, - virtio_balloon_config_size(s)); + virtio_init(vdev, VIRTIO_ID_BALLOON, virtio_balloon_config_size(s)); ret = qemu_add_balloon_handler(virtio_balloon_to_target, virtio_balloon_stat, s); diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c index 9d5795810c..9743bee965 100644 --- a/hw/virtio/virtio-blk-pci.c +++ b/hw/virtio/virtio-blk-pci.c @@ -19,7 +19,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-blk.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qapi/error.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 0f69d1c742..d7ec023adf 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) return; } - vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); - if (klass->get_dma_as != NULL && has_iommu) { + vdev->dma_as = &address_space_memory; + if (has_iommu) { + vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + /* + * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and + * device operational. If the driver does not accept IOMMU_PLATFORM + * we fail the device. + */ virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); - vdev->dma_as = klass->get_dma_as(qbus->parent); - if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { - error_setg(errp, + if (klass->get_dma_as) { + vdev->dma_as = klass->get_dma_as(qbus->parent); + if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { + error_setg(errp, "iommu_platform=true is not supported by the device"); - return; + return; + } } - } else { - vdev->dma_as = &address_space_memory; } } diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index dcd80b904d..c3829e7498 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) } out_num = elem->out_num; - out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); out_iov = out_iov_copy; in_num = elem->in_num; @@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) } out_num = elem->out_num; - out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); out_iov = out_iov_copy; in_num = elem->in_num; - in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); + in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); in_iov = in_iov_copy; if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) @@ -810,7 +810,7 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-crypto", VIRTIO_ID_CRYPTO, vcrypto->config_size); + virtio_init(vdev, VIRTIO_ID_CRYPTO, vcrypto->config_size); vcrypto->curr_queues = 1; vcrypto->vqs = g_new0(VirtIOCryptoQueue, vcrypto->max_queues); for (i = 0; i < vcrypto->max_queues; i++) { @@ -961,6 +961,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); } +static struct vhost_dev *virtio_crypto_get_vhost(VirtIODevice *vdev) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + CryptoDevBackend *b = vcrypto->cryptodev; + CryptoDevBackendClient *cc = b->conf.peers.ccs[0]; + CryptoDevBackendVhost *vhost_crypto = cryptodev_get_vhost(cc, b, 0); + return &vhost_crypto->dev; +} + static void virtio_crypto_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -977,6 +986,7 @@ static void virtio_crypto_class_init(ObjectClass *klass, void *data) vdc->set_status = virtio_crypto_set_status; vdc->guest_notifier_mask = virtio_crypto_guest_notifier_mask; vdc->guest_notifier_pending = virtio_crypto_guest_notifier_pending; + vdc->get_vhost = virtio_crypto_get_vhost; } static void virtio_crypto_instance_init(Object *obj) diff --git a/hw/virtio/virtio-input-host-pci.c b/hw/virtio/virtio-input-host-pci.c index 0ac360de4f..cf8a9cf9e8 100644 --- a/hw/virtio/virtio-input-host-pci.c +++ b/hw/virtio/virtio-input-host-pci.c @@ -8,7 +8,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-input.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/virtio/virtio-input-pci.c b/hw/virtio/virtio-input-pci.c index 48e9ff38e2..a9d0992389 100644 --- a/hw/virtio/virtio-input-pci.c +++ b/hw/virtio/virtio-input-pci.c @@ -8,7 +8,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-input.h" #include "qemu/module.h" diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index 6a1df7fe50..844d647704 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-iommu.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 57c09d98a9..2597e166f9 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1033,8 +1033,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOIOMMU *s = VIRTIO_IOMMU(dev); - virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU, - sizeof(struct virtio_iommu_config)); + virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 5aca408726..30d03e987a 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -867,8 +867,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmem->block_size; vmem->bitmap = bitmap_new(vmem->bitmap_size); - virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM, - sizeof(struct virtio_mem_config)); + virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config)); vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); host_memory_backend_set_mapped(vmem->memdev, true); diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index aa0b3caecb..e03543a70a 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -19,7 +19,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-net.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qapi/error.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cf1231c1c..0566ad7d00 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -33,11 +33,12 @@ #include "hw/pci/msix.h" #include "hw/loader.h" #include "sysemu/kvm.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qemu/range.h" #include "hw/virtio/virtio-bus.h" #include "qapi/visitor.h" #include "sysemu/replay.h" +#include "trace.h" #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) @@ -1380,6 +1381,7 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr, unsigned queue = addr / virtio_pci_queue_mem_mult(proxy); if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) { + trace_virtio_pci_notify_write(addr, val, size); virtio_queue_notify(vdev, queue); } } @@ -1393,6 +1395,7 @@ static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr, unsigned queue = val; if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) { + trace_virtio_pci_notify_write_pio(addr, val, size); virtio_queue_notify(vdev, queue); } } diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index 5dd21c2c44..a1abfe0e1b 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -122,8 +122,7 @@ static void virtio_pmem_realize(DeviceState *dev, Error **errp) } host_memory_backend_set_mapped(pmem->memdev, true); - virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM, - sizeof(struct virtio_pmem_config)); + virtio_init(vdev, VIRTIO_ID_PMEM, sizeof(struct virtio_pmem_config)); pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush); } diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c index c1f916268b..151ece6f94 100644 --- a/hw/virtio/virtio-rng-pci.c +++ b/hw/virtio/virtio-rng-pci.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-rng.h" #include "qapi/error.h" #include "qemu/module.h" diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index cc8e9f775d..7e12fc03bf 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -215,7 +215,7 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-rng", VIRTIO_ID_RNG, 0); + virtio_init(vdev, VIRTIO_ID_RNG, 0); vrng->vq = virtio_add_queue(vdev, 8, handle_input); vrng->quota_remaining = vrng->conf.max_bytes; diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c index 97fab74236..e8e3442f38 100644 --- a/hw/virtio/virtio-scsi-pci.c +++ b/hw/virtio/virtio-scsi-pci.c @@ -18,7 +18,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-scsi.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VirtIOSCSIPCI VirtIOSCSIPCI; diff --git a/hw/virtio/virtio-serial-pci.c b/hw/virtio/virtio-serial-pci.c index 35bcd961c9..cea31adcc4 100644 --- a/hw/virtio/virtio-serial-pci.c +++ b/hw/virtio/virtio-serial-pci.c @@ -20,7 +20,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-serial.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VirtIOSerialPCI VirtIOSerialPCI; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 67a873f54a..5d607aeaa0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -132,6 +132,56 @@ struct VirtQueue QLIST_ENTRY(VirtQueue) node; }; +const char *virtio_device_names[] = { + [VIRTIO_ID_NET] = "virtio-net", + [VIRTIO_ID_BLOCK] = "virtio-blk", + [VIRTIO_ID_CONSOLE] = "virtio-serial", + [VIRTIO_ID_RNG] = "virtio-rng", + [VIRTIO_ID_BALLOON] = "virtio-balloon", + [VIRTIO_ID_IOMEM] = "virtio-iomem", + [VIRTIO_ID_RPMSG] = "virtio-rpmsg", + [VIRTIO_ID_SCSI] = "virtio-scsi", + [VIRTIO_ID_9P] = "virtio-9p", + [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan", + [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial", + [VIRTIO_ID_CAIF] = "virtio-caif", + [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon", + [VIRTIO_ID_GPU] = "virtio-gpu", + [VIRTIO_ID_CLOCK] = "virtio-clk", + [VIRTIO_ID_INPUT] = "virtio-input", + [VIRTIO_ID_VSOCK] = "vhost-vsock", + [VIRTIO_ID_CRYPTO] = "virtio-crypto", + [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal", + [VIRTIO_ID_PSTORE] = "virtio-pstore", + [VIRTIO_ID_IOMMU] = "virtio-iommu", + [VIRTIO_ID_MEM] = "virtio-mem", + [VIRTIO_ID_SOUND] = "virtio-sound", + [VIRTIO_ID_FS] = "virtio-user-fs", + [VIRTIO_ID_PMEM] = "virtio-pmem", + [VIRTIO_ID_RPMB] = "virtio-rpmb", + [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim", + [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder", + [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder", + [VIRTIO_ID_SCMI] = "virtio-scmi", + [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod", + [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c", + [VIRTIO_ID_WATCHDOG] = "virtio-watchdog", + [VIRTIO_ID_CAN] = "virtio-can", + [VIRTIO_ID_DMABUF] = "virtio-dmabuf", + [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv", + [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol", + [VIRTIO_ID_BT] = "virtio-bluetooth", + [VIRTIO_ID_GPIO] = "virtio-gpio" +}; + +static const char *virtio_id_to_name(uint16_t device_id) +{ + assert(device_id < G_N_ELEMENTS(virtio_device_names)); + const char *name = virtio_device_names[device_id]; + assert(name != NULL); + return name; +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -3207,8 +3257,7 @@ void virtio_instance_init_common(Object *proxy_obj, void *data, qdev_alias_all_properties(vdev, proxy_obj); } -void virtio_init(VirtIODevice *vdev, const char *name, - uint16_t device_id, size_t config_size) +void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size) { BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); @@ -3222,6 +3271,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->start_on_kick = false; vdev->started = false; + vdev->vhost_started = false; vdev->device_id = device_id; vdev->status = 0; qatomic_set(&vdev->isr, 0); @@ -3237,7 +3287,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->vq[i].host_notifier_enabled = false; } - vdev->name = name; + vdev->name = virtio_id_to_name(device_id); vdev->config_len = config_size; if (vdev->config_len) { vdev->config = g_malloc0(config_size); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 027190fa44..0ec7e52183 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -60,7 +60,6 @@ #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "hw/xen/xen.h" -#include "hw/i386/pc.h" #include "hw/xen/xen-legacy-backend.h" #include "xen_pt.h" #include "qemu/range.h" @@ -702,17 +701,6 @@ static const MemoryListener xen_pt_io_listener = { .priority = 10, }; -static void -xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, - XenHostPCIDevice *dev) -{ - uint16_t gpu_dev_id; - PCIDevice *d = &s->dev; - - gpu_dev_id = dev->device_id; - igd_passthrough_isa_bridge_create(pci_get_bus(d), gpu_dev_id); -} - /* destroy. */ static void xen_pt_destroy(PCIDevice *d) { diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 6b8e13cdee..e7c4316a7d 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -43,6 +43,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(XenPCIPassthroughState, XEN_PT_DEVICE) uint32_t igd_read_opregion(XenPCIPassthroughState *s); void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val); +void xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, + XenHostPCIDevice *dev); /* function type for config reg */ typedef int (*xen_pt_conf_reg_init) diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c index a3bc7e3921..f303f67c9c 100644 --- a/hw/xen/xen_pt_graphics.c +++ b/hw/xen/xen_pt_graphics.c @@ -289,3 +289,125 @@ void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val) (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); } + +typedef struct { + uint16_t gpu_device_id; + uint16_t pch_device_id; + uint8_t pch_revision_id; +} IGDDeviceIDInfo; + +/* + * In real world different GPU should have different PCH. But actually + * the different PCH DIDs likely map to different PCH SKUs. We do the + * same thing for the GPU. For PCH, the different SKUs are going to be + * all the same silicon design and implementation, just different + * features turn on and off with fuses. The SW interfaces should be + * consistent across all SKUs in a given family (eg LPT). But just same + * features may not be supported. + * + * Most of these different PCH features probably don't matter to the + * Gfx driver, but obviously any difference in display port connections + * will so it should be fine with any PCH in case of passthrough. + * + * So currently use one PCH version, 0x8c4e, to cover all HSW(Haswell) + * scenarios, 0x9cc3 for BDW(Broadwell). + */ +static const IGDDeviceIDInfo igd_combo_id_infos[] = { + /* HSW Classic */ + {0x0402, 0x8c4e, 0x04}, /* HSWGT1D, HSWD_w7 */ + {0x0406, 0x8c4e, 0x04}, /* HSWGT1M, HSWM_w7 */ + {0x0412, 0x8c4e, 0x04}, /* HSWGT2D, HSWD_w7 */ + {0x0416, 0x8c4e, 0x04}, /* HSWGT2M, HSWM_w7 */ + {0x041E, 0x8c4e, 0x04}, /* HSWGT15D, HSWD_w7 */ + /* HSW ULT */ + {0x0A06, 0x8c4e, 0x04}, /* HSWGT1UT, HSWM_w7 */ + {0x0A16, 0x8c4e, 0x04}, /* HSWGT2UT, HSWM_w7 */ + {0x0A26, 0x8c4e, 0x06}, /* HSWGT3UT, HSWM_w7 */ + {0x0A2E, 0x8c4e, 0x04}, /* HSWGT3UT28W, HSWM_w7 */ + {0x0A1E, 0x8c4e, 0x04}, /* HSWGT2UX, HSWM_w7 */ + {0x0A0E, 0x8c4e, 0x04}, /* HSWGT1ULX, HSWM_w7 */ + /* HSW CRW */ + {0x0D26, 0x8c4e, 0x04}, /* HSWGT3CW, HSWM_w7 */ + {0x0D22, 0x8c4e, 0x04}, /* HSWGT3CWDT, HSWD_w7 */ + /* HSW Server */ + {0x041A, 0x8c4e, 0x04}, /* HSWSVGT2, HSWD_w7 */ + /* HSW SRVR */ + {0x040A, 0x8c4e, 0x04}, /* HSWSVGT1, HSWD_w7 */ + /* BSW */ + {0x1606, 0x9cc3, 0x03}, /* BDWULTGT1, BDWM_w7 */ + {0x1616, 0x9cc3, 0x03}, /* BDWULTGT2, BDWM_w7 */ + {0x1626, 0x9cc3, 0x03}, /* BDWULTGT3, BDWM_w7 */ + {0x160E, 0x9cc3, 0x03}, /* BDWULXGT1, BDWM_w7 */ + {0x161E, 0x9cc3, 0x03}, /* BDWULXGT2, BDWM_w7 */ + {0x1602, 0x9cc3, 0x03}, /* BDWHALOGT1, BDWM_w7 */ + {0x1612, 0x9cc3, 0x03}, /* BDWHALOGT2, BDWM_w7 */ + {0x1622, 0x9cc3, 0x03}, /* BDWHALOGT3, BDWM_w7 */ + {0x162B, 0x9cc3, 0x03}, /* BDWHALO28W, BDWM_w7 */ + {0x162A, 0x9cc3, 0x03}, /* BDWGT3WRKS, BDWM_w7 */ + {0x162D, 0x9cc3, 0x03}, /* BDWGT3SRVR, BDWM_w7 */ +}; + +static void isa_bridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + dc->desc = "ISA bridge faked to support IGD PT"; + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->class_id = PCI_CLASS_BRIDGE_ISA; +}; + +static const TypeInfo isa_bridge_info = { + .name = "igd-passthrough-isa-bridge", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIDevice), + .class_init = isa_bridge_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + +static void pt_graphics_register_types(void) +{ + type_register_static(&isa_bridge_info); +} +type_init(pt_graphics_register_types) + +void xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, + XenHostPCIDevice *dev) +{ + PCIBus *bus = pci_get_bus(&s->dev); + struct PCIDevice *bridge_dev; + int i, num; + const uint16_t gpu_dev_id = dev->device_id; + uint16_t pch_dev_id = 0xffff; + uint8_t pch_rev_id = 0; + + num = ARRAY_SIZE(igd_combo_id_infos); + for (i = 0; i < num; i++) { + if (gpu_dev_id == igd_combo_id_infos[i].gpu_device_id) { + pch_dev_id = igd_combo_id_infos[i].pch_device_id; + pch_rev_id = igd_combo_id_infos[i].pch_revision_id; + } + } + + if (pch_dev_id == 0xffff) { + return; + } + + /* Currently IGD drivers always need to access PCH by 1f.0. */ + bridge_dev = pci_create_simple(bus, PCI_DEVFN(0x1f, 0), + "igd-passthrough-isa-bridge"); + + /* + * Note that vendor id is always PCI_VENDOR_ID_INTEL. + */ + if (!bridge_dev) { + fprintf(stderr, "set igd-passthrough-isa-bridge failed!\n"); + return; + } + pci_config_set_device_id(bridge_dev->config, pch_dev_id); + pci_config_set_revision(bridge_dev->config, pch_rev_id); +} diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h index 3412e108ca..50ce0247c3 100644 --- a/include/block/block-hmp-cmds.h +++ b/include/block/block-hmp-cmds.h @@ -12,8 +12,8 @@ * the COPYING file in the top-level directory. */ -#ifndef BLOCK_HMP_COMMANDS_H -#define BLOCK_HMP_COMMANDS_H +#ifndef BLOCK_BLOCK_HMP_CMDS_H +#define BLOCK_BLOCK_HMP_CMDS_H void hmp_drive_add(Monitor *mon, const QDict *qdict); diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index 8b2e95f5ff..b49f4eb35b 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + #ifndef BLOCK_INT_GLOBAL_STATE_H #define BLOCK_INT_GLOBAL_STATE_H @@ -326,4 +327,4 @@ static inline void assert_bdrv_graph_writable(BlockDriverState *bs) assert(qemu_in_main_thread()); } -#endif /* BLOCK_INT_GLOBAL_STATE */ +#endif /* BLOCK_INT_GLOBAL_STATE_H */ diff --git a/include/block/nbd.h b/include/block/nbd.h index a98eb665da..c74b7a9d2e 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2020 Red Hat, Inc. + * Copyright (C) 2016-2022 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device @@ -344,8 +344,9 @@ void nbd_client_new(QIOChannelSocket *sioc, void nbd_client_get(NBDClient *client); void nbd_client_put(NBDClient *client); -void nbd_server_is_qemu_nbd(bool value); +void nbd_server_is_qemu_nbd(int max_connections); bool nbd_server_is_running(void); +int nbd_server_max_connections(void); void nbd_server_start(SocketAddress *addr, const char *tls_creds, const char *tls_authz, uint32_t max_connections, Error **errp); diff --git a/include/chardev/char-socket.h b/include/chardev/char-socket.h index 6b6e2ceba1..0708ca6fa9 100644 --- a/include/chardev/char-socket.h +++ b/include/chardev/char-socket.h @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef CHAR_SOCKET_H_ -#define CHAR_SOCKET_H_ + +#ifndef CHAR_SOCKET_H +#define CHAR_SOCKET_H #include "io/channel-socket.h" #include "io/channel-tls.h" @@ -83,4 +84,4 @@ typedef struct SocketChardev SocketChardev; DECLARE_INSTANCE_CHECKER(SocketChardev, SOCKET_CHARDEV, TYPE_CHARDEV_SOCKET) -#endif /* CHAR_SOCKET_H_ */ +#endif /* CHAR_SOCKET_H */ diff --git a/include/crypto/tls-cipher-suites.h b/include/crypto/tls-cipher-suites.h index 7eb1b76122..3bd2003f32 100644 --- a/include/crypto/tls-cipher-suites.h +++ b/include/crypto/tls-cipher-suites.h @@ -8,8 +8,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef QCRYPTO_TLSCIPHERSUITES_H -#define QCRYPTO_TLSCIPHERSUITES_H +#ifndef QCRYPTO_TLS_CIPHER_SUITES_H +#define QCRYPTO_TLS_CIPHER_SUITES_H #include "qom/object.h" #include "crypto/tlscreds.h" @@ -31,4 +31,4 @@ DECLARE_INSTANCE_CHECKER(QCryptoTLSCipherSuites, QCRYPTO_TLS_CIPHER_SUITES, GByteArray *qcrypto_tls_cipher_suites_get_data(QCryptoTLSCipherSuites *obj, Error **errp); -#endif /* QCRYPTO_TLSCIPHERSUITES_H */ +#endif /* QCRYPTO_TLS_CIPHER_SUITES_H */ diff --git a/include/exec/memopidx.h b/include/exec/memopidx.h index 83bce97874..eb7f1591a3 100644 --- a/include/exec/memopidx.h +++ b/include/exec/memopidx.h @@ -9,7 +9,7 @@ */ #ifndef EXEC_MEMOPIDX_H -#define EXEC_MEMOPIDX_H 1 +#define EXEC_MEMOPIDX_H #include "exec/memop.h" diff --git a/include/exec/translator.h b/include/exec/translator.h index 31d3fa76ff..7db6845535 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -187,4 +187,4 @@ FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) #undef GEN_TRANSLATOR_LD -#endif /* EXEC__TRANSLATOR_H */ +#endif /* EXEC__TRANSLATOR_H */ diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h index a98d759cd3..94cbe073ec 100644 --- a/include/fpu/softfloat-helpers.h +++ b/include/fpu/softfloat-helpers.h @@ -141,4 +141,4 @@ static inline bool get_default_nan_mode(float_status *status) return status->default_nan_mode; } -#endif /* _SOFTFLOAT_HELPERS_H_ */ +#endif /* SOFTFLOAT_HELPERS_H */ diff --git a/include/hw/acpi/cxl.h b/include/hw/acpi/cxl.h new file mode 100644 index 0000000000..0c496538c0 --- /dev/null +++ b/include/hw/acpi/cxl.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_ACPI_CXL_H +#define HW_ACPI_CXL_H + +#include "hw/acpi/bios-linker-loader.h" + +void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data, + BIOSLinker *linker, const char *oem_id, + const char *oem_table_id); +void build_cxl_osc_method(Aml *dev); + +#endif diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h index d49217c445..d831bbd889 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h @@ -56,8 +56,8 @@ * */ -#ifndef HW_ACPI_GED_H -#define HW_ACPI_GED_H +#ifndef HW_ACPI_GENERIC_EVENT_DEVICE_H +#define HW_ACPI_GENERIC_EVENT_DEVICE_H #include "hw/sysbus.h" #include "hw/acpi/memory_hotplug.h" diff --git a/include/hw/audio/soundhw.h b/include/hw/audio/soundhw.h index e68685fcda..270717a06a 100644 --- a/include/hw/audio/soundhw.h +++ b/include/hw/audio/soundhw.h @@ -2,11 +2,12 @@ #define HW_SOUNDHW_H void pci_register_soundhw(const char *name, const char *descr, - int (*init_pci)(PCIBus *bus)); + int (*init_pci)(PCIBus *bus, const char *audiodev)); void deprecated_register_soundhw(const char *name, const char *descr, int isa, const char *typename); void soundhw_init(void); -void select_soundhw(const char *optarg); +void show_valid_soundhw(void); +void select_soundhw(const char *optarg, const char *audiodev); #endif diff --git a/include/hw/boards.h b/include/hw/boards.h index d64b5481e8..fa57bac4fb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -25,7 +25,7 @@ OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE) extern MachineState *current_machine; -void machine_run_board_init(MachineState *machine); +void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp); bool machine_usb(MachineState *machine); int machine_phandle_start(MachineState *machine); bool machine_dump_guest_core(MachineState *machine); @@ -269,6 +269,7 @@ struct MachineClass { bool ignore_boot_device_suffixes; bool smbus_no_migration_support; bool nvdimm_supported; + bool cxl_supported; bool numa_mem_supported; bool auto_enable_numa; SMPCompatProps smp_props; @@ -339,7 +340,7 @@ struct MachineState { bool suppress_vmdesc; bool enable_graphics; ConfidentialGuestSupport *cgs; - char *ram_memdev_id; + HostMemoryBackend *memdev; /* * convenience alias to ram_memdev_id backend memory region * or to numa container memory region @@ -350,8 +351,7 @@ struct MachineState { ram_addr_t ram_size; ram_addr_t maxram_size; uint64_t ram_slots; - const char *boot_order; - const char *boot_once; + BootConfiguration boot_config; char *kernel_filename; char *kernel_cmdline; char *initrd_filename; @@ -360,6 +360,7 @@ struct MachineState { CPUArchIdList *possible_cpus; CpuTopology smp; struct NVDIMMState *nvdimms_state; + struct CXLState *cxl_devices_state; struct NumaState *numa_state; }; diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h new file mode 100644 index 0000000000..21d28ca110 --- /dev/null +++ b/include/hw/cxl/cxl.h @@ -0,0 +1,61 @@ +/* + * QEMU CXL Support + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_H +#define CXL_H + + +#include "qapi/qapi-types-machine.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "cxl_pci.h" +#include "cxl_component.h" +#include "cxl_device.h" + +#define CXL_COMPONENT_REG_BAR_IDX 0 +#define CXL_DEVICE_REG_BAR_IDX 2 + +#define CXL_WINDOW_MAX 10 + +typedef struct CXLFixedWindow { + uint64_t size; + char **targets; + struct PXBDev *target_hbs[8]; + uint8_t num_targets; + uint8_t enc_int_ways; + uint8_t enc_int_gran; + /* Todo: XOR based interleaving */ + MemoryRegion mr; + hwaddr base; +} CXLFixedWindow; + +typedef struct CXLState { + bool is_enabled; + MemoryRegion host_mr; + unsigned int next_mr_idx; + GList *fixed_windows; +} CXLState; + +struct CXLHost { + PCIHostState parent_obj; + + CXLComponentState cxl_cstate; +}; + +#define TYPE_PXB_CXL_HOST "pxb-cxl-host" +OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST) + +void cxl_fixed_memory_window_config(MachineState *ms, + CXLFixedMemoryWindowOptions *object, + Error **errp); +void cxl_fixed_memory_window_link_targets(Error **errp); + +extern const MemoryRegionOps cfmws_ops; + +#endif diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h new file mode 100644 index 0000000000..70b5018156 --- /dev/null +++ b/include/hw/cxl/cxl_component.h @@ -0,0 +1,223 @@ +/* + * QEMU CXL Component + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_COMPONENT_H +#define CXL_COMPONENT_H + +/* CXL 2.0 - 8.2.4 */ +#define CXL2_COMPONENT_IO_REGION_SIZE 0x1000 +#define CXL2_COMPONENT_CM_REGION_SIZE 0x1000 +#define CXL2_COMPONENT_BLOCK_SIZE 0x10000 + +#include "qemu/compiler.h" +#include "qemu/range.h" +#include "qemu/typedefs.h" +#include "hw/register.h" + +enum reg_type { + CXL2_DEVICE, + CXL2_TYPE3_DEVICE, + CXL2_LOGICAL_DEVICE, + CXL2_ROOT_PORT, + CXL2_UPSTREAM_PORT, + CXL2_DOWNSTREAM_PORT +}; + +/* + * Capability registers are defined at the top of the CXL.cache/mem region and + * are packed. For our purposes we will always define the caps in the same + * order. + * CXL 2.0 - 8.2.5 Table 142 for details. + */ + +/* CXL 2.0 - 8.2.5.1 */ +REG32(CXL_CAPABILITY_HEADER, 0) + FIELD(CXL_CAPABILITY_HEADER, ID, 0, 16) + FIELD(CXL_CAPABILITY_HEADER, VERSION, 16, 4) + FIELD(CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 20, 4) + FIELD(CXL_CAPABILITY_HEADER, ARRAY_SIZE, 24, 8) + +#define CXLx_CAPABILITY_HEADER(type, offset) \ + REG32(CXL_##type##_CAPABILITY_HEADER, offset) \ + FIELD(CXL_##type##_CAPABILITY_HEADER, ID, 0, 16) \ + FIELD(CXL_##type##_CAPABILITY_HEADER, VERSION, 16, 4) \ + FIELD(CXL_##type##_CAPABILITY_HEADER, PTR, 20, 12) +CXLx_CAPABILITY_HEADER(RAS, 0x4) +CXLx_CAPABILITY_HEADER(LINK, 0x8) +CXLx_CAPABILITY_HEADER(HDM, 0xc) +CXLx_CAPABILITY_HEADER(EXTSEC, 0x10) +CXLx_CAPABILITY_HEADER(SNOOP, 0x14) + +/* + * Capability structures contain the actual registers that the CXL component + * implements. Some of these are specific to certain types of components, but + * this implementation leaves enough space regardless. + */ +/* 8.2.5.9 - CXL RAS Capability Structure */ + +/* Give ample space for caps before this */ +#define CXL_RAS_REGISTERS_OFFSET 0x80 +#define CXL_RAS_REGISTERS_SIZE 0x58 +REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET) +REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4) +REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8) +REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc) +REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10) +REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14) +/* Offset 0x18 - 0x58 reserved for RAS logs */ + +/* 8.2.5.10 - CXL Security Capability Structure */ +#define CXL_SEC_REGISTERS_OFFSET \ + (CXL_RAS_REGISTERS_OFFSET + CXL_RAS_REGISTERS_SIZE) +#define CXL_SEC_REGISTERS_SIZE 0 /* We don't implement 1.1 downstream ports */ + +/* 8.2.5.11 - CXL Link Capability Structure */ +#define CXL_LINK_REGISTERS_OFFSET \ + (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE) +#define CXL_LINK_REGISTERS_SIZE 0x38 + +/* 8.2.5.12 - CXL HDM Decoder Capability Structure */ +#define HDM_DECODE_MAX 10 /* 8.2.5.12.1 */ +#define CXL_HDM_REGISTERS_OFFSET \ + (CXL_LINK_REGISTERS_OFFSET + CXL_LINK_REGISTERS_SIZE) +#define CXL_HDM_REGISTERS_SIZE (0x10 + 0x20 * HDM_DECODE_MAX) +#define HDM_DECODER_INIT(n) \ + REG32(CXL_HDM_DECODER##n##_BASE_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x10) \ + FIELD(CXL_HDM_DECODER##n##_BASE_LO, L, 28, 4) \ + REG32(CXL_HDM_DECODER##n##_BASE_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x14) \ + REG32(CXL_HDM_DECODER##n##_SIZE_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x18) \ + REG32(CXL_HDM_DECODER##n##_SIZE_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x1C) \ + REG32(CXL_HDM_DECODER##n##_CTRL, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x20) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, IG, 0, 4) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, IW, 4, 4) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, LOCK_ON_COMMIT, 8, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, COMMIT, 9, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, COMMITTED, 10, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, ERR, 11, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, TYPE, 12, 1) \ + REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ + REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28) + +REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET) + FIELD(CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, 0, 4) + FIELD(CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 4, 4) + FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 8, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 9, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 10, 1) +REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4) + FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1) + FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1) + +HDM_DECODER_INIT(0); + +/* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */ +#define EXTSEC_ENTRY_MAX 256 +#define CXL_EXTSEC_REGISTERS_OFFSET \ + (CXL_HDM_REGISTERS_OFFSET + CXL_HDM_REGISTERS_SIZE) +#define CXL_EXTSEC_REGISTERS_SIZE (8 * EXTSEC_ENTRY_MAX + 4) + +/* 8.2.5.14 - CXL IDE Capability Structure */ +#define CXL_IDE_REGISTERS_OFFSET \ + (CXL_EXTSEC_REGISTERS_OFFSET + CXL_EXTSEC_REGISTERS_SIZE) +#define CXL_IDE_REGISTERS_SIZE 0x20 + +/* 8.2.5.15 - CXL Snoop Filter Capability Structure */ +#define CXL_SNOOP_REGISTERS_OFFSET \ + (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE) +#define CXL_SNOOP_REGISTERS_SIZE 0x8 + +QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE) >= 0x1000, + "No space for registers"); + +typedef struct component_registers { + /* + * Main memory region to be registered with QEMU core. + */ + MemoryRegion component_registers; + + /* + * 8.2.4 Table 141: + * 0x0000 - 0x0fff CXL.io registers + * 0x1000 - 0x1fff CXL.cache and CXL.mem + * 0x2000 - 0xdfff Implementation specific + * 0xe000 - 0xe3ff CXL ARB/MUX registers + * 0xe400 - 0xffff RSVD + */ + uint32_t io_registers[CXL2_COMPONENT_IO_REGION_SIZE >> 2]; + MemoryRegion io; + + uint32_t cache_mem_registers[CXL2_COMPONENT_CM_REGION_SIZE >> 2]; + uint32_t cache_mem_regs_write_mask[CXL2_COMPONENT_CM_REGION_SIZE >> 2]; + MemoryRegion cache_mem; + + MemoryRegion impl_specific; + MemoryRegion arb_mux; + MemoryRegion rsvd; + + /* special_ops is used for any component that needs any specific handling */ + MemoryRegionOps *special_ops; +} ComponentRegisters; + +/* + * A CXL component represents all entities in a CXL hierarchy. This includes, + * host bridges, root ports, upstream/downstream switch ports, and devices + */ +typedef struct cxl_component { + ComponentRegisters crb; + union { + struct { + Range dvsecs[CXL20_MAX_DVSEC]; + uint16_t dvsec_offset; + struct PCIDevice *pdev; + }; + }; +} CXLComponentState; + +void cxl_component_register_block_init(Object *obj, + CXLComponentState *cxl_cstate, + const char *type); +void cxl_component_register_init_common(uint32_t *reg_state, + uint32_t *write_msk, + enum reg_type type); + +void cxl_component_create_dvsec(CXLComponentState *cxl_cstate, + enum reg_type cxl_dev_type, uint16_t length, + uint16_t type, uint8_t rev, uint8_t *body); + +static inline int cxl_decoder_count_enc(int count) +{ + switch (count) { + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 6: return 3; + case 8: return 4; + case 10: return 5; + } + return 0; +} + +uint8_t cxl_interleave_ways_enc(int iw, Error **errp); +uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp); + +static inline hwaddr cxl_decode_ig(int ig) +{ + return 1 << (ig + 8); +} + +CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb); + +#endif diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h new file mode 100644 index 0000000000..1e141b6621 --- /dev/null +++ b/include/hw/cxl/cxl_device.h @@ -0,0 +1,268 @@ +/* + * QEMU CXL Devices + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_DEVICE_H +#define CXL_DEVICE_H + +#include "hw/register.h" + +/* + * The following is how a CXL device's Memory Device registers are laid out. + * The only requirement from the spec is that the capabilities array and the + * capability headers start at offset 0 and are contiguously packed. The headers + * themselves provide offsets to the register fields. For this emulation, the + * actual registers * will start at offset 0x80 (m == 0x80). No secondary + * mailbox is implemented which means that the offset of the start of the + * mailbox payload (n) is given by + * n = m + sizeof(mailbox registers) + sizeof(device registers). + * + * +---------------------------------+ + * | | + * | Memory Device Registers | + * | | + * n + PAYLOAD_SIZE_MAX ----------------------------------- + * ^ | | + * | | | + * | | | + * | | | + * | | | + * | | Mailbox Payload | + * | | | + * | | | + * | | | + * n ----------------------------------- + * ^ | Mailbox Registers | + * | | | + * | ----------------------------------- + * | | | + * | | Device Registers | + * | | | + * m ----------------------------------> + * ^ | Memory Device Capability Header| + * | ----------------------------------- + * | | Mailbox Capability Header | + * | ----------------------------------- + * | | Device Capability Header | + * | ----------------------------------- + * | | Device Cap Array Register | + * 0 +---------------------------------+ + * + */ + +#define CXL_DEVICE_CAP_HDR1_OFFSET 0x10 /* Figure 138 */ +#define CXL_DEVICE_CAP_REG_SIZE 0x10 /* 8.2.8.2 */ +#define CXL_DEVICE_CAPS_MAX 4 /* 8.2.8.2.1 + 8.2.8.5 */ +#define CXL_CAPS_SIZE \ + (CXL_DEVICE_CAP_REG_SIZE * (CXL_DEVICE_CAPS_MAX + 1)) /* +1 for header */ + +#define CXL_DEVICE_STATUS_REGISTERS_OFFSET 0x80 /* Read comment above */ +#define CXL_DEVICE_STATUS_REGISTERS_LENGTH 0x8 /* 8.2.8.3.1 */ + +#define CXL_MAILBOX_REGISTERS_OFFSET \ + (CXL_DEVICE_STATUS_REGISTERS_OFFSET + CXL_DEVICE_STATUS_REGISTERS_LENGTH) +#define CXL_MAILBOX_REGISTERS_SIZE 0x20 /* 8.2.8.4, Figure 139 */ +#define CXL_MAILBOX_PAYLOAD_SHIFT 11 +#define CXL_MAILBOX_MAX_PAYLOAD_SIZE (1 << CXL_MAILBOX_PAYLOAD_SHIFT) +#define CXL_MAILBOX_REGISTERS_LENGTH \ + (CXL_MAILBOX_REGISTERS_SIZE + CXL_MAILBOX_MAX_PAYLOAD_SIZE) + +#define CXL_MEMORY_DEVICE_REGISTERS_OFFSET \ + (CXL_MAILBOX_REGISTERS_OFFSET + CXL_MAILBOX_REGISTERS_LENGTH) +#define CXL_MEMORY_DEVICE_REGISTERS_LENGTH 0x8 + +#define CXL_MMIO_SIZE \ + (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \ + CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH) + +typedef struct cxl_device_state { + MemoryRegion device_registers; + + /* mmio for device capabilities array - 8.2.8.2 */ + MemoryRegion device; + MemoryRegion memory_device; + struct { + MemoryRegion caps; + union { + uint32_t caps_reg_state32[CXL_CAPS_SIZE / 4]; + uint64_t caps_reg_state64[CXL_CAPS_SIZE / 8]; + }; + }; + + /* mmio for the mailbox registers 8.2.8.4 */ + struct { + MemoryRegion mailbox; + uint16_t payload_size; + union { + uint8_t mbox_reg_state[CXL_MAILBOX_REGISTERS_LENGTH]; + uint16_t mbox_reg_state16[CXL_MAILBOX_REGISTERS_LENGTH / 2]; + uint32_t mbox_reg_state32[CXL_MAILBOX_REGISTERS_LENGTH / 4]; + uint64_t mbox_reg_state64[CXL_MAILBOX_REGISTERS_LENGTH / 8]; + }; + struct cel_log { + uint16_t opcode; + uint16_t effect; + } cel_log[1 << 16]; + size_t cel_size; + }; + + struct { + bool set; + uint64_t last_set; + uint64_t host_set; + } timestamp; + + /* memory region for persistent memory, HDM */ + uint64_t pmem_size; +} CXLDeviceState; + +/* Initialize the register block for a device */ +void cxl_device_register_block_init(Object *obj, CXLDeviceState *dev); + +/* Set up default values for the register block */ +void cxl_device_register_init_common(CXLDeviceState *dev); + +/* + * CXL 2.0 - 8.2.8.1 including errata F4 + * Documented as a 128 bit register, but 64 bit accesses and the second + * 64 bits are currently reserved. + */ +REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit register but 64 byte accesses */ + FIELD(CXL_DEV_CAP_ARRAY, CAP_ID, 0, 16) + FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8) + FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16) + +/* + * Helper macro to initialize capability headers for CXL devices. + * + * In the 8.2.8.2, this is listed as a 128b register, but in 8.2.8, it says: + * > No registers defined in Section 8.2.8 are larger than 64-bits wide so that + * > is the maximum access size allowed for these registers. If this rule is not + * > followed, the behavior is undefined + * + * CXL 2.0 Errata F4 states futher that the layouts in the specification are + * shown as greater than 128 bits, but implementations are expected to + * use any size of access up to 64 bits. + * + * Here we've chosen to make it 4 dwords. The spec allows any pow2 multiple + * access to be used for a register up to 64 bits. + */ +#define CXL_DEVICE_CAPABILITY_HEADER_REGISTER(n, offset) \ + REG32(CXL_DEV_##n##_CAP_HDR0, offset) \ + FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_ID, 0, 16) \ + FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_VERSION, 16, 8) \ + REG32(CXL_DEV_##n##_CAP_HDR1, offset + 4) \ + FIELD(CXL_DEV_##n##_CAP_HDR1, CAP_OFFSET, 0, 32) \ + REG32(CXL_DEV_##n##_CAP_HDR2, offset + 8) \ + FIELD(CXL_DEV_##n##_CAP_HDR2, CAP_LENGTH, 0, 32) + +CXL_DEVICE_CAPABILITY_HEADER_REGISTER(DEVICE_STATUS, CXL_DEVICE_CAP_HDR1_OFFSET) +CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MAILBOX, CXL_DEVICE_CAP_HDR1_OFFSET + \ + CXL_DEVICE_CAP_REG_SIZE) +CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE, + CXL_DEVICE_CAP_HDR1_OFFSET + + CXL_DEVICE_CAP_REG_SIZE * 2) + +int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate); +void cxl_process_mailbox(CXLDeviceState *cxl_dstate); + +#define cxl_device_cap_init(dstate, reg, cap_id) \ + do { \ + uint32_t *cap_hdrs = dstate->caps_reg_state32; \ + int which = R_CXL_DEV_##reg##_CAP_HDR0; \ + cap_hdrs[which] = \ + FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, \ + CAP_ID, cap_id); \ + cap_hdrs[which] = FIELD_DP32( \ + cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1); \ + cap_hdrs[which + 1] = \ + FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1, \ + CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET); \ + cap_hdrs[which + 2] = \ + FIELD_DP32(cap_hdrs[which + 2], CXL_DEV_##reg##_CAP_HDR2, \ + CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \ + } while (0) + +/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */ +REG32(CXL_DEV_MAILBOX_CAP, 0) + FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5) + FIELD(CXL_DEV_MAILBOX_CAP, INT_CAP, 5, 1) + FIELD(CXL_DEV_MAILBOX_CAP, BG_INT_CAP, 6, 1) + FIELD(CXL_DEV_MAILBOX_CAP, MSI_N, 7, 4) + +/* CXL 2.0 8.2.8.4.4 Mailbox Control Register */ +REG32(CXL_DEV_MAILBOX_CTRL, 4) + FIELD(CXL_DEV_MAILBOX_CTRL, DOORBELL, 0, 1) + FIELD(CXL_DEV_MAILBOX_CTRL, INT_EN, 1, 1) + FIELD(CXL_DEV_MAILBOX_CTRL, BG_INT_EN, 2, 1) + +/* CXL 2.0 8.2.8.4.5 Command Register */ +REG64(CXL_DEV_MAILBOX_CMD, 8) + FIELD(CXL_DEV_MAILBOX_CMD, COMMAND, 0, 8) + FIELD(CXL_DEV_MAILBOX_CMD, COMMAND_SET, 8, 8) + FIELD(CXL_DEV_MAILBOX_CMD, LENGTH, 16, 20) + +/* CXL 2.0 8.2.8.4.6 Mailbox Status Register */ +REG64(CXL_DEV_MAILBOX_STS, 0x10) + FIELD(CXL_DEV_MAILBOX_STS, BG_OP, 0, 1) + FIELD(CXL_DEV_MAILBOX_STS, ERRNO, 32, 16) + FIELD(CXL_DEV_MAILBOX_STS, VENDOR_ERRNO, 48, 16) + +/* CXL 2.0 8.2.8.4.7 Background Command Status Register */ +REG64(CXL_DEV_BG_CMD_STS, 0x18) + FIELD(CXL_DEV_BG_CMD_STS, OP, 0, 16) + FIELD(CXL_DEV_BG_CMD_STS, PERCENTAGE_COMP, 16, 7) + FIELD(CXL_DEV_BG_CMD_STS, RET_CODE, 32, 16) + FIELD(CXL_DEV_BG_CMD_STS, VENDOR_RET_CODE, 48, 16) + +/* CXL 2.0 8.2.8.4.8 Command Payload Registers */ +REG32(CXL_DEV_CMD_PAYLOAD, 0x20) + +REG64(CXL_MEM_DEV_STS, 0) + FIELD(CXL_MEM_DEV_STS, FATAL, 0, 1) + FIELD(CXL_MEM_DEV_STS, FW_HALT, 1, 1) + FIELD(CXL_MEM_DEV_STS, MEDIA_STATUS, 2, 2) + FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1) + FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3) + +struct CXLType3Dev { + /* Private */ + PCIDevice parent_obj; + + /* Properties */ + HostMemoryBackend *hostmem; + HostMemoryBackend *lsa; + + /* State */ + AddressSpace hostmem_as; + CXLComponentState cxl_cstate; + CXLDeviceState cxl_dstate; +}; + +#define TYPE_CXL_TYPE3 "cxl-type3" +OBJECT_DECLARE_TYPE(CXLType3Dev, CXLType3Class, CXL_TYPE3) + +struct CXLType3Class { + /* Private */ + PCIDeviceClass parent_class; + + /* public */ + uint64_t (*get_lsa_size)(CXLType3Dev *ct3d); + + uint64_t (*get_lsa)(CXLType3Dev *ct3d, void *buf, uint64_t size, + uint64_t offset); + void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size, + uint64_t offset); +}; + +MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs); +MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs); + +#endif diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h new file mode 100644 index 0000000000..01cf002096 --- /dev/null +++ b/include/hw/cxl/cxl_pci.h @@ -0,0 +1,167 @@ +/* + * QEMU CXL PCI interfaces + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_PCI_H +#define CXL_PCI_H + +#include "qemu/compiler.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie.h" + +#define CXL_VENDOR_ID 0x1e98 + +#define PCIE_DVSEC_HEADER1_OFFSET 0x4 /* Offset from start of extend cap */ +#define PCIE_DVSEC_ID_OFFSET 0x8 + +#define PCIE_CXL_DEVICE_DVSEC_LENGTH 0x38 +#define PCIE_CXL1_DEVICE_DVSEC_REVID 0 +#define PCIE_CXL2_DEVICE_DVSEC_REVID 1 + +#define EXTENSIONS_PORT_DVSEC_LENGTH 0x28 +#define EXTENSIONS_PORT_DVSEC_REVID 0 + +#define GPF_PORT_DVSEC_LENGTH 0x10 +#define GPF_PORT_DVSEC_REVID 0 + +#define GPF_DEVICE_DVSEC_LENGTH 0x10 +#define GPF_DEVICE_DVSEC_REVID 0 + +#define PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0 0x14 +#define PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0 1 + +#define REG_LOC_DVSEC_LENGTH 0x24 +#define REG_LOC_DVSEC_REVID 0 + +enum { + PCIE_CXL_DEVICE_DVSEC = 0, + NON_CXL_FUNCTION_MAP_DVSEC = 2, + EXTENSIONS_PORT_DVSEC = 3, + GPF_PORT_DVSEC = 4, + GPF_DEVICE_DVSEC = 5, + PCIE_FLEXBUS_PORT_DVSEC = 7, + REG_LOC_DVSEC = 8, + MLD_DVSEC = 9, + CXL20_MAX_DVSEC +}; + +typedef struct DVSECHeader { + uint32_t cap_hdr; + uint32_t dv_hdr1; + uint16_t dv_hdr2; +} QEMU_PACKED DVSECHeader; +QEMU_BUILD_BUG_ON(sizeof(DVSECHeader) != 10); + +/* + * CXL 2.0 devices must implement certain DVSEC IDs, and can [optionally] + * implement others. + * + * CXL 2.0 Device: 0, [2], 5, 8 + * CXL 2.0 RP: 3, 4, 7, 8 + * CXL 2.0 Upstream Port: [2], 7, 8 + * CXL 2.0 Downstream Port: 3, 4, 7, 8 + */ + +/* CXL 2.0 - 8.1.3 (ID 0001) */ +typedef struct CXLDVSECDevice { + DVSECHeader hdr; + uint16_t cap; + uint16_t ctrl; + uint16_t status; + uint16_t ctrl2; + uint16_t status2; + uint16_t lock; + uint16_t cap2; + uint32_t range1_size_hi; + uint32_t range1_size_lo; + uint32_t range1_base_hi; + uint32_t range1_base_lo; + uint32_t range2_size_hi; + uint32_t range2_size_lo; + uint32_t range2_base_hi; + uint32_t range2_base_lo; +} CXLDVSECDevice; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDevice) != 0x38); + +/* CXL 2.0 - 8.1.5 (ID 0003) */ +typedef struct CXLDVSECPortExtensions { + DVSECHeader hdr; + uint16_t status; + uint16_t control; + uint8_t alt_bus_base; + uint8_t alt_bus_limit; + uint16_t alt_memory_base; + uint16_t alt_memory_limit; + uint16_t alt_prefetch_base; + uint16_t alt_prefetch_limit; + uint32_t alt_prefetch_base_high; + uint32_t alt_prefetch_limit_high; + uint32_t rcrb_base; + uint32_t rcrb_base_high; +} CXLDVSECPortExtensions; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortExtensions) != 0x28); + +#define PORT_CONTROL_OFFSET 0xc +#define PORT_CONTROL_UNMASK_SBR 1 +#define PORT_CONTROL_ALT_MEMID_EN 4 + +/* CXL 2.0 - 8.1.6 GPF DVSEC (ID 0004) */ +typedef struct CXLDVSECPortGPF { + DVSECHeader hdr; + uint16_t rsvd; + uint16_t phase1_ctrl; + uint16_t phase2_ctrl; +} CXLDVSECPortGPF; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortGPF) != 0x10); + +/* CXL 2.0 - 8.1.7 GPF DVSEC for CXL Device */ +typedef struct CXLDVSECDeviceGPF { + DVSECHeader hdr; + uint16_t phase2_duration; + uint32_t phase2_power; +} CXLDVSECDeviceGPF; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDeviceGPF) != 0x10); + +/* CXL 2.0 - 8.1.8/8.2.1.3 Flex Bus DVSEC (ID 0007) */ +typedef struct CXLDVSECPortFlexBus { + DVSECHeader hdr; + uint16_t cap; + uint16_t ctrl; + uint16_t status; + uint32_t rcvd_mod_ts_data_phase1; +} CXLDVSECPortFlexBus; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortFlexBus) != 0x14); + +/* CXL 2.0 - 8.1.9 Register Locator DVSEC (ID 0008) */ +typedef struct CXLDVSECRegisterLocator { + DVSECHeader hdr; + uint16_t rsvd; + uint32_t reg0_base_lo; + uint32_t reg0_base_hi; + uint32_t reg1_base_lo; + uint32_t reg1_base_hi; + uint32_t reg2_base_lo; + uint32_t reg2_base_hi; +} CXLDVSECRegisterLocator; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECRegisterLocator) != 0x24); + +/* BAR Equivalence Indicator */ +#define BEI_BAR_10H 0 +#define BEI_BAR_14H 1 +#define BEI_BAR_18H 2 +#define BEI_BAR_1cH 3 +#define BEI_BAR_20H 4 +#define BEI_BAR_24H 5 + +/* Register Block Identifier */ +#define RBI_EMPTY 0 +#define RBI_COMPONENT_REG (1 << 8) +#define RBI_BAR_VIRT_ACL (2 << 8) +#define RBI_CXL_DEVICE_REG (3 << 8) + +#endif diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h index 801846befb..6dee3cd438 100644 --- a/include/hw/gpio/aspeed_gpio.h +++ b/include/hw/gpio/aspeed_gpio.h @@ -93,4 +93,4 @@ struct AspeedGPIOState { } sets[ASPEED_GPIO_MAX_NR_SETS]; }; -#endif /* _ASPEED_GPIO_H_ */ +#endif /* ASPEED_GPIO_H */ diff --git a/include/hw/i2c/arm_sbcon_i2c.h b/include/hw/i2c/arm_sbcon_i2c.h index ad96781e7a..f54d1e5413 100644 --- a/include/hw/i2c/arm_sbcon_i2c.h +++ b/include/hw/i2c/arm_sbcon_i2c.h @@ -9,8 +9,9 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef HW_I2C_ARM_SBCON_H -#define HW_I2C_ARM_SBCON_H + +#ifndef HW_I2C_ARM_SBCON_I2C_H +#define HW_I2C_ARM_SBCON_I2C_H #include "hw/sysbus.h" #include "hw/i2c/bitbang_i2c.h" @@ -34,4 +35,4 @@ struct ArmSbconI2CState { int in; }; -#endif /* HW_I2C_ARM_SBCON_H */ +#endif /* HW_I2C_ARM_SBCON_I2C_H */ diff --git a/include/hw/i2c/i2c_mux_pca954x.h b/include/hw/i2c/i2c_mux_pca954x.h index 8aaf9bbc39..3dd25ec983 100644 --- a/include/hw/i2c/i2c_mux_pca954x.h +++ b/include/hw/i2c/i2c_mux_pca954x.h @@ -1,5 +1,5 @@ -#ifndef QEMU_I2C_MUX_PCA954X -#define QEMU_I2C_MUX_PCA954X +#ifndef QEMU_I2C_MUX_PCA954X_H +#define QEMU_I2C_MUX_PCA954X_H #include "hw/i2c/i2c.h" diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index bfa982a419..67653b0f9b 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -267,6 +267,7 @@ struct IntelIOMMUState { bool buggy_eim; /* Force buggy EIM unless eim=off */ uint8_t aw_bits; /* Host/IOVA address width (in bits) */ bool dma_drain; /* Whether DMA r/w draining enabled */ + bool dma_translation; /* Whether DMA translation supported */ /* * Protects IOMMU states in general. Currently it protects the diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index efcbd926fd..fad97a891d 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -67,8 +67,6 @@ #define PCIE_ECAM_SIZE 0x10000000 /* Machine type options */ -#define MICROVM_MACHINE_PIT "pit" -#define MICROVM_MACHINE_PIC "pic" #define MICROVM_MACHINE_RTC "rtc" #define MICROVM_MACHINE_PCIE "pcie" #define MICROVM_MACHINE_IOAPIC2 "ioapic2" @@ -86,8 +84,6 @@ struct MicrovmMachineState { X86MachineState parent; /* Machine type options */ - OnOffAuto pic; - OnOffAuto pit; OnOffAuto rtc; OnOffAuto pcie; OnOffAuto ioapic2; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 637367dc5f..ffcac5121e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -46,7 +46,6 @@ typedef struct PCMachineState { bool acpi_build_enabled; bool smbus_enabled; bool sata_enabled; - bool pit_enabled; bool hpet_enabled; bool i8042_enabled; bool default_bus_bypass_iommu; @@ -64,7 +63,6 @@ typedef struct PCMachineState { #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMBUS "smbus" #define PC_MACHINE_SATA "sata" -#define PC_MACHINE_PIT "pit" #define PC_MACHINE_I8042 "i8042" #define PC_MACHINE_MAX_FW_SIZE "max-fw-size" #define PC_MACHINE_SMBIOS_EP "smbios-entry-point-type" @@ -315,5 +313,4 @@ extern const size_t pc_compat_1_4_len; } \ type_init(pc_machine_init_##suffix) -extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); #endif diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 4841a49f86..9089bdd99c 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -65,6 +65,8 @@ struct X86MachineState { OnOffAuto smm; OnOffAuto acpi; + OnOffAuto pit; + OnOffAuto pic; char *oem_id; char *oem_table_id; @@ -84,6 +86,8 @@ struct X86MachineState { #define X86_MACHINE_SMM "smm" #define X86_MACHINE_ACPI "acpi" +#define X86_MACHINE_PIT "pit" +#define X86_MACHINE_PIC "pic" #define X86_MACHINE_OEM_ID "x-oem-id" #define X86_MACHINE_OEM_TABLE_ID "x-oem-table-id" #define X86_MACHINE_BUS_LOCK_RATELIMIT "bus-lock-ratelimit" diff --git a/include/hw/input/lm832x.h b/include/hw/input/lm832x.h index 2a58ccf891..e0e5d5ef20 100644 --- a/include/hw/input/lm832x.h +++ b/include/hw/input/lm832x.h @@ -18,8 +18,8 @@ * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HW_INPUT_LM832X -#define HW_INPUT_LM832X +#ifndef HW_INPUT_LM832X_H +#define HW_INPUT_LM832X_H #define TYPE_LM8323 "lm8323" diff --git a/include/hw/intc/exynos4210_combiner.h b/include/hw/intc/exynos4210_combiner.h index 429844fed4..bd207a7e6e 100644 --- a/include/hw/intc/exynos4210_combiner.h +++ b/include/hw/intc/exynos4210_combiner.h @@ -20,8 +20,8 @@ * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HW_INTC_EXYNOS4210_COMBINER -#define HW_INTC_EXYNOS4210_COMBINER +#ifndef HW_INTC_EXYNOS4210_COMBINER_H +#define HW_INTC_EXYNOS4210_COMBINER_H #include "hw/sysbus.h" diff --git a/include/hw/intc/nios2_vic.h b/include/hw/intc/nios2_vic.h index af1517a967..ac507b9d74 100644 --- a/include/hw/intc/nios2_vic.h +++ b/include/hw/intc/nios2_vic.h @@ -32,8 +32,8 @@ * THE SOFTWARE. */ -#ifndef HW_INTC_NIOS2_VIC -#define HW_INTC_NIOS2_VIC +#ifndef HW_INTC_NIOS2_VIC_H +#define HW_INTC_NIOS2_VIC_H #define TYPE_NIOS2_VIC "nios2-vic" OBJECT_DECLARE_SIMPLE_TYPE(Nios2VIC, NIOS2_VIC) @@ -61,4 +61,4 @@ struct Nios2VIC { uint32_t vec_tbl_addr; }; -#endif /* HW_INTC_NIOS2_VIC */ +#endif /* HW_INTC_NIOS2_VIC_H */ diff --git a/include/hw/intc/rx_icu.h b/include/hw/intc/rx_icu.h index 7f5889b36f..b23504f3dd 100644 --- a/include/hw/intc/rx_icu.h +++ b/include/hw/intc/rx_icu.h @@ -73,4 +73,4 @@ struct RXICUState { #define TYPE_RX_ICU "rx-icu" OBJECT_DECLARE_SIMPLE_TYPE(RXICUState, RX_ICU) -#endif /* RX_ICU_H */ +#endif /* HW_INTC_RX_ICU_H */ diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h index 4f9ce179bf..ecb1b67de8 100644 --- a/include/hw/misc/aspeed_hace.h +++ b/include/hw/misc/aspeed_hace.h @@ -47,4 +47,4 @@ struct AspeedHACEClass { uint32_t hash_mask; }; -#endif /* _ASPEED_HACE_H_ */ +#endif /* ASPEED_HACE_H */ diff --git a/include/hw/misc/aspeed_lpc.h b/include/hw/misc/aspeed_lpc.h index df418cfcd3..fd228731d2 100644 --- a/include/hw/misc/aspeed_lpc.h +++ b/include/hw/misc/aspeed_lpc.h @@ -44,4 +44,4 @@ typedef struct AspeedLPCState { uint32_t hicr7; } AspeedLPCState; -#endif /* _ASPEED_LPC_H_ */ +#endif /* ASPEED_LPC_H */ diff --git a/include/hw/misc/aspeed_sbc.h b/include/hw/misc/aspeed_sbc.h index 651747e28f..67e43b53ec 100644 --- a/include/hw/misc/aspeed_sbc.h +++ b/include/hw/misc/aspeed_sbc.h @@ -29,4 +29,4 @@ struct AspeedSBCClass { SysBusDeviceClass parent_class; }; -#endif /* _ASPEED_SBC_H_ */ +#endif /* ASPEED_SBC_H */ diff --git a/include/hw/misc/bcm2835_cprman.h b/include/hw/misc/bcm2835_cprman.h index 3df4ceedd2..0d38036728 100644 --- a/include/hw/misc/bcm2835_cprman.h +++ b/include/hw/misc/bcm2835_cprman.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef HW_MISC_CPRMAN_H -#define HW_MISC_CPRMAN_H +#ifndef HW_MISC_BCM2835_CPRMAN_H +#define HW_MISC_BCM2835_CPRMAN_H #include "hw/sysbus.h" #include "hw/qdev-clock.h" diff --git a/include/hw/misc/bcm2835_cprman_internals.h b/include/hw/misc/bcm2835_cprman_internals.h index 339759b307..7617aff96f 100644 --- a/include/hw/misc/bcm2835_cprman_internals.h +++ b/include/hw/misc/bcm2835_cprman_internals.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef HW_MISC_CPRMAN_INTERNALS_H -#define HW_MISC_CPRMAN_INTERNALS_H +#ifndef HW_MISC_BCM2835_CPRMAN_INTERNALS_H +#define HW_MISC_BCM2835_CPRMAN_INTERNALS_H #include "hw/registerfields.h" #include "hw/misc/bcm2835_cprman.h" diff --git a/include/hw/misc/stm32f4xx_exti.h b/include/hw/misc/stm32f4xx_exti.h index ea6b0097b0..fc11c595fa 100644 --- a/include/hw/misc/stm32f4xx_exti.h +++ b/include/hw/misc/stm32f4xx_exti.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef HW_STM_EXTI_H -#define HW_STM_EXTI_H +#ifndef HW_STM32F4XX_EXTI_H +#define HW_STM32F4XX_EXTI_H #include "hw/sysbus.h" #include "qom/object.h" diff --git a/include/hw/misc/stm32f4xx_syscfg.h b/include/hw/misc/stm32f4xx_syscfg.h index 6f8ca49228..9fce67f4b4 100644 --- a/include/hw/misc/stm32f4xx_syscfg.h +++ b/include/hw/misc/stm32f4xx_syscfg.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef HW_STM_SYSCFG_H -#define HW_STM_SYSCFG_H +#ifndef HW_STM32F4XX_SYSCFG_H +#define HW_STM32F4XX_SYSCFG_H #include "hw/sysbus.h" #include "qom/object.h" diff --git a/include/hw/misc/xlnx-versal-pmc-iou-slcr.h b/include/hw/misc/xlnx-versal-pmc-iou-slcr.h index ab4e4b4f18..2170420f01 100644 --- a/include/hw/misc/xlnx-versal-pmc-iou-slcr.h +++ b/include/hw/misc/xlnx-versal-pmc-iou-slcr.h @@ -51,8 +51,8 @@ * 1: OSPI direct access mode. */ -#ifndef XILINX_VERSAL_PMC_IOU_SLCR_H -#define XILINX_VERSAL_PMC_IOU_SLCR_H +#ifndef XLNX_VERSAL_PMC_IOU_SLCR_H +#define XLNX_VERSAL_PMC_IOU_SLCR_H #include "hw/register.h" @@ -75,4 +75,4 @@ struct XlnxVersalPmcIouSlcr { RegisterInfo regs_info[XILINX_VERSAL_PMC_IOU_SLCR_R_MAX]; }; -#endif /* XILINX_VERSAL_PMC_IOU_SLCR_H */ +#endif /* XLNX_VERSAL_PMC_IOU_SLCR_H */ diff --git a/include/hw/net/allwinner-sun8i-emac.h b/include/hw/net/allwinner-sun8i-emac.h index 460a58f1ca..185895f4e1 100644 --- a/include/hw/net/allwinner-sun8i-emac.h +++ b/include/hw/net/allwinner-sun8i-emac.h @@ -101,4 +101,4 @@ struct AwSun8iEmacState { }; -#endif /* HW_NET_ALLWINNER_SUN8I_H */ +#endif /* HW_NET_ALLWINNER_SUN8I_EMAC_H */ diff --git a/include/hw/net/mv88w8618_eth.h b/include/hw/net/mv88w8618_eth.h index 8f4c746092..41074940ec 100644 --- a/include/hw/net/mv88w8618_eth.h +++ b/include/hw/net/mv88w8618_eth.h @@ -4,8 +4,9 @@ * * Copyright (c) 2008-2021 QEMU contributors */ -#ifndef HW_NET_MV88W8618_H -#define HW_NET_MV88W8618_H + +#ifndef HW_NET_MV88W8618_ETH_H +#define HW_NET_MV88W8618_ETH_H #define TYPE_MV88W8618_ETH "mv88w8618_eth" diff --git a/include/hw/nubus/mac-nubus-bridge.h b/include/hw/nubus/mac-nubus-bridge.h index 70ab50ab2d..be4dd83530 100644 --- a/include/hw/nubus/mac-nubus-bridge.h +++ b/include/hw/nubus/mac-nubus-bridge.h @@ -6,8 +6,8 @@ * */ -#ifndef HW_NUBUS_MAC_H -#define HW_NUBUS_MAC_H +#ifndef HW_NUBUS_MAC_NUBUS_BRIDGE_H +#define HW_NUBUS_MAC_NUBUS_BRIDGE_H #include "hw/nubus/nubus.h" #include "qom/object.h" diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h index 3dcf6aa51d..690a01f0fe 100644 --- a/include/hw/pci-host/remote.h +++ b/include/hw/pci-host/remote.h @@ -8,8 +8,8 @@ * */ -#ifndef REMOTE_PCIHOST_H -#define REMOTE_PCIHOST_H +#ifndef PCI_HOST_REMOTE_H +#define PCI_HOST_REMOTE_H #include "exec/memory.h" #include "hw/pci/pcie_host.h" diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 3a32b8dd40..44dacfa224 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -194,6 +194,8 @@ enum { QEMU_PCIE_LNKSTA_DLLLA = (1 << QEMU_PCIE_LNKSTA_DLLLA_BITNR), #define QEMU_PCIE_EXTCAP_INIT_BITNR 9 QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), +#define QEMU_PCIE_CXL_BITNR 10 + QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), }; #define TYPE_PCI_DEVICE "pci-device" @@ -201,6 +203,12 @@ typedef struct PCIDeviceClass PCIDeviceClass; DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass, PCI_DEVICE, TYPE_PCI_DEVICE) +/* + * Implemented by devices that can be plugged on CXL buses. In the spec, this is + * actually a "CXL Component, but we name it device to match the PCI naming. + */ +#define INTERFACE_CXL_DEVICE "cxl-device" + /* Implemented by devices that can be plugged on PCI Express buses */ #define INTERFACE_PCIE_DEVICE "pci-express-device" @@ -400,6 +408,7 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); #define TYPE_PCI_BUS "PCI" OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS) #define TYPE_PCIE_BUS "PCIE" +#define TYPE_CXL_BUS "CXL" typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque); typedef void (*pci_bus_fn)(PCIBus *b, void *opaque); @@ -762,6 +771,11 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) pci_irq_deassert(pci_dev); } +static inline int pci_is_cxl(const PCIDevice *d) +{ + return d->cap_present & QEMU_PCIE_CAP_CXL; +} + static inline int pci_is_express(const PCIDevice *d) { return d->cap_present & QEMU_PCI_CAP_EXPRESS; diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 30691a6e57..ba4bafac7c 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -28,6 +28,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" +#include "hw/cxl/cxl.h" #include "qom/object.h" typedef struct PCIBridgeWindows PCIBridgeWindows; @@ -80,6 +81,25 @@ struct PCIBridge { #define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr" #define PCI_BRIDGE_DEV_PROP_MSI "msi" #define PCI_BRIDGE_DEV_PROP_SHPC "shpc" +typedef struct CXLHost CXLHost; + +struct PXBDev { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + uint8_t bus_nr; + uint16_t numa_node; + bool bypass_iommu; + struct cxl_dev { + CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */ + } cxl; +}; + +typedef struct PXBDev PXBDev; +#define TYPE_PXB_CXL_DEVICE "pxb-cxl" +DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV, + TYPE_PXB_CXL_DEVICE) int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset, uint16_t svid, uint16_t ssid, diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index 347440d42c..eb94e7e85c 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -24,6 +24,8 @@ enum PCIBusFlags { PCI_BUS_IS_ROOT = 0x0001, /* PCIe extended configuration space is accessible on this bus */ PCI_BUS_EXTENDED_CONFIG_SPACE = 0x0002, + /* This is a CXL Type BUS */ + PCI_BUS_CXL = 0x0004, }; struct PCIBus { @@ -53,6 +55,11 @@ struct PCIBus { Notifier machine_done; }; +static inline bool pci_bus_is_cxl(PCIBus *bus) +{ + return !!(bus->flags & PCI_BUS_CXL); +} + static inline bool pci_bus_is_root(PCIBus *bus) { return !!(bus->flags & PCI_BUS_IS_ROOT); diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 11abe22d46..898083b86f 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -53,6 +53,7 @@ #define PCI_BASE_CLASS_MEMORY 0x05 #define PCI_CLASS_MEMORY_RAM 0x0500 #define PCI_CLASS_MEMORY_FLASH 0x0501 +#define PCI_CLASS_MEMORY_CXL 0x0502 #define PCI_CLASS_MEMORY_OTHER 0x0580 #define PCI_BASE_CLASS_BRIDGE 0x06 diff --git a/include/hw/pci/pcie_host.h b/include/hw/pci/pcie_host.h index 076457b270..82d92177da 100644 --- a/include/hw/pci/pcie_host.h +++ b/include/hw/pci/pcie_host.h @@ -60,15 +60,15 @@ void pcie_host_mmcfg_update(PCIExpressHost *e, /* * PCI express ECAM (Enhanced Configuration Address Mapping) format. * AKA mmcfg address - * bit 20 - 28: bus number + * bit 20 - 27: bus number * bit 15 - 19: device number * bit 12 - 14: function number * bit 0 - 11: offset in configuration space of a given device */ -#define PCIE_MMCFG_SIZE_MAX (1ULL << 29) +#define PCIE_MMCFG_SIZE_MAX (1ULL << 28) #define PCIE_MMCFG_SIZE_MIN (1ULL << 20) #define PCIE_MMCFG_BUS_BIT 20 -#define PCIE_MMCFG_BUS_MASK 0x1ff +#define PCIE_MMCFG_BUS_MASK 0xff #define PCIE_MMCFG_DEVFN_BIT 12 #define PCIE_MMCFG_DEVFN_MASK 0xff #define PCIE_MMCFG_CONFOFFSET_MASK 0xfff diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index e25b289ce8..7b8193061a 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -39,6 +39,8 @@ struct PCIEPort { void pcie_port_init_reg(PCIDevice *d); +PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn); + #define TYPE_PCIE_SLOT "pcie-slot" OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT) diff --git a/include/hw/ppc/pnv_pnor.h b/include/hw/ppc/pnv_pnor.h index 99f9a3adfb..bab2f79844 100644 --- a/include/hw/ppc/pnv_pnor.h +++ b/include/hw/ppc/pnv_pnor.h @@ -6,8 +6,10 @@ * This code is licensed under the GPL version 2 or later. See the * COPYING file in the top-level directory. */ -#ifndef _PPC_PNV_PNOR_H -#define _PPC_PNV_PNOR_H + +#ifndef PPC_PNV_PNOR_H +#define PPC_PNV_PNOR_H + #include "qom/object.h" /* @@ -28,4 +30,4 @@ struct PnvPnor { MemoryRegion mmio; }; -#endif /* _PPC_PNV_PNOR_H */ +#endif /* PPC_PNV_PNOR_H */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index f7925f67d0..e1df08876c 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -17,6 +17,7 @@ struct Property { const PropertyInfo *info; ptrdiff_t offset; uint8_t bitnr; + uint64_t bitmask; bool set_default; union { int64_t i; @@ -54,6 +55,7 @@ extern const PropertyInfo qdev_prop_uint16; extern const PropertyInfo qdev_prop_uint32; extern const PropertyInfo qdev_prop_int32; extern const PropertyInfo qdev_prop_uint64; +extern const PropertyInfo qdev_prop_uint64_checkmask; extern const PropertyInfo qdev_prop_int64; extern const PropertyInfo qdev_prop_size; extern const PropertyInfo qdev_prop_string; @@ -103,6 +105,16 @@ extern const PropertyInfo qdev_prop_link; .set_default = true, \ .defval.u = (bool)_defval) +/** + * The DEFINE_PROP_UINT64_CHECKMASK macro checks a user-supplied value + * against corresponding bitmask, rejects the value if it violates. + * The default value is set in instance_init(). + */ +#define DEFINE_PROP_UINT64_CHECKMASK(_name, _state, _field, _bitmask) \ + DEFINE_PROP(_name, _state, _field, qdev_prop_uint64_checkmask, uint64_t, \ + .bitmask = (_bitmask), \ + .set_default = false) + #define PROP_ARRAY_LEN_PREFIX "len-" /** diff --git a/include/hw/riscv/boot_opensbi.h b/include/hw/riscv/boot_opensbi.h index 0d5ddd6c3d..c19cad4818 100644 --- a/include/hw/riscv/boot_opensbi.h +++ b/include/hw/riscv/boot_opensbi.h @@ -4,8 +4,9 @@ * * Based on include/sbi/{fw_dynamic.h,sbi_scratch.h} from the OpenSBI project. */ -#ifndef OPENSBI_H -#define OPENSBI_H + +#ifndef RISCV_BOOT_OPENSBI_H +#define RISCV_BOOT_OPENSBI_H /** Expected value of info magic ('OSBI' ascii string in hex) */ #define FW_DYNAMIC_INFO_MAGIC_VALUE 0x4942534f diff --git a/include/hw/riscv/shakti_c.h b/include/hw/riscv/shakti_c.h index 50a2b79086..daf0aae13f 100644 --- a/include/hw/riscv/shakti_c.h +++ b/include/hw/riscv/shakti_c.h @@ -16,8 +16,8 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef HW_SHAKTI_H -#define HW_SHAKTI_H +#ifndef HW_SHAKTI_C_H +#define HW_SHAKTI_C_H #include "hw/riscv/riscv_hart.h" #include "hw/boards.h" diff --git a/include/hw/rtc/m48t59.h b/include/hw/rtc/m48t59.h index d9b45eb161..c14937476c 100644 --- a/include/hw/rtc/m48t59.h +++ b/include/hw/rtc/m48t59.h @@ -47,4 +47,4 @@ struct NvramClass { void (*toggle_lock)(Nvram *obj, int lock); }; -#endif /* HW_M48T59_H */ +#endif /* HW_RTC_M48T59_H */ diff --git a/include/hw/rtc/mc146818rtc.h b/include/hw/rtc/mc146818rtc.h index deef93f89a..33d85753c0 100644 --- a/include/hw/rtc/mc146818rtc.h +++ b/include/hw/rtc/mc146818rtc.h @@ -56,4 +56,4 @@ ISADevice *mc146818_rtc_init(ISABus *bus, int base_year, void rtc_set_memory(ISADevice *dev, int addr, int val); int rtc_get_memory(ISADevice *dev, int addr); -#endif /* MC146818RTC_H */ +#endif /* HW_RTC_MC146818RTC_H */ diff --git a/include/hw/rtc/sun4v-rtc.h b/include/hw/rtc/sun4v-rtc.h index fd868f6ed2..fc54dfcba4 100644 --- a/include/hw/rtc/sun4v-rtc.h +++ b/include/hw/rtc/sun4v-rtc.h @@ -9,8 +9,8 @@ * version. */ -#ifndef HW_RTC_SUN4V -#define HW_RTC_SUN4V +#ifndef HW_RTC_SUN4V_RTC_H +#define HW_RTC_SUN4V_RTC_H #include "exec/hwaddr.h" diff --git a/include/hw/rtc/xlnx-zynqmp-rtc.h b/include/hw/rtc/xlnx-zynqmp-rtc.h index 5f1ad0a946..f0c6a2d78a 100644 --- a/include/hw/rtc/xlnx-zynqmp-rtc.h +++ b/include/hw/rtc/xlnx-zynqmp-rtc.h @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#ifndef HW_RTC_XLNX_ZYNQMP_H -#define HW_RTC_XLNX_ZYNQMP_H +#ifndef HW_RTC_XLNX_ZYNQMP_RTC_H +#define HW_RTC_XLNX_ZYNQMP_RTC_H #include "hw/register.h" #include "hw/sysbus.h" diff --git a/include/hw/rx/rx62n.h b/include/hw/rx/rx62n.h index 3ed80dba0d..73ceeb58e5 100644 --- a/include/hw/rx/rx62n.h +++ b/include/hw/rx/rx62n.h @@ -21,8 +21,8 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef HW_RX_RX62N_MCU_H -#define HW_RX_RX62N_MCU_H +#ifndef HW_RX_RX62N_H +#define HW_RX_RX62N_H #include "target/rx/cpu.h" #include "hw/intc/rx_icu.h" diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h index cc8c8662b8..03b7f9ba5f 100644 --- a/include/hw/s390x/s390-pci-clp.h +++ b/include/hw/s390x/s390-pci-clp.h @@ -9,8 +9,8 @@ * directory. */ -#ifndef HW_S390_PCI_CLP -#define HW_S390_PCI_CLP +#ifndef HW_S390_PCI_CLP_H +#define HW_S390_PCI_CLP_H /* CLP common request & response block size */ #define CLP_BLK_SIZE 4096 diff --git a/include/hw/sensor/emc141x_regs.h b/include/hw/sensor/emc141x_regs.h index 0560fb7c5c..e509a43d55 100644 --- a/include/hw/sensor/emc141x_regs.h +++ b/include/hw/sensor/emc141x_regs.h @@ -9,8 +9,8 @@ * later. See the COPYING file in the top-level directory. */ -#ifndef TMP105_REGS_H -#define TMP105_REGS_H +#ifndef EMC141X_REGS_H +#define EMC141X_REGS_H #define EMC1413_DEVICE_ID 0x21 #define EMC1414_DEVICE_ID 0x25 diff --git a/include/hw/ssi/xlnx-versal-ospi.h b/include/hw/ssi/xlnx-versal-ospi.h index 14d1263497..5d131d351d 100644 --- a/include/hw/ssi/xlnx-versal-ospi.h +++ b/include/hw/ssi/xlnx-versal-ospi.h @@ -49,8 +49,8 @@ * + Property "indac-write-disabled": Disable indirect access writes. */ -#ifndef XILINX_VERSAL_OSPI_H -#define XILINX_VERSAL_OSPI_H +#ifndef XLNX_VERSAL_OSPI_H +#define XLNX_VERSAL_OSPI_H #include "hw/register.h" #include "hw/ssi/ssi.h" @@ -108,4 +108,4 @@ struct XlnxVersalOspi { uint8_t stig_membank[512]; }; -#endif /* XILINX_VERSAL_OSPI_H */ +#endif /* XLNX_VERSAL_OSPI_H */ diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h index bd3097d746..a8f605beeb 100644 --- a/include/hw/timer/bcm2835_systmr.h +++ b/include/hw/timer/bcm2835_systmr.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef BCM2835_SYSTIMER_H -#define BCM2835_SYSTIMER_H +#ifndef BCM2835_SYSTMR_H +#define BCM2835_SYSTMR_H #include "hw/sysbus.h" #include "hw/irq.h" diff --git a/include/hw/tricore/tc27x_soc.h b/include/hw/tricore/tc27x_soc.h index 6a7e5b54f5..dd3a7485c8 100644 --- a/include/hw/tricore/tc27x_soc.h +++ b/include/hw/tricore/tc27x_soc.h @@ -18,8 +18,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TC27X_SoC_H -#define TC27X_SoC_H +#ifndef TC27X_SOC_H +#define TC27X_SOC_H #include "hw/sysbus.h" #include "target/tricore/cpu.h" diff --git a/include/hw/tricore/tricore_testdevice.h b/include/hw/tricore/tricore_testdevice.h index 2c56c51bcb..1e2b8942ac 100644 --- a/include/hw/tricore/tricore_testdevice.h +++ b/include/hw/tricore/tricore_testdevice.h @@ -15,9 +15,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ - -#ifndef HW_TRICORE_TESTDEV_H -#define HW_TRICORE_TESTDEV_H +#ifndef HW_TRICORE_TESTDEVICE_H +#define HW_TRICORE_TESTDEVICE_H #include "hw/sysbus.h" #include "hw/hw.h" diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h index a7eb531485..4015c1d691 100644 --- a/include/hw/usb/dwc2-regs.h +++ b/include/hw/usb/dwc2-regs.h @@ -39,8 +39,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef DWC2_HW_H -#define DWC2_HW_H +#ifndef DWC2_REGS_H +#define DWC2_REGS_H #define HSOTG_REG(x) (x) @@ -896,4 +896,4 @@ struct dwc2_dma_desc { #define MAX_DMA_DESC_NUM_GENERIC 64 #define MAX_DMA_DESC_NUM_HS_ISOC 256 -#endif /* __DWC2_HW_H__ */ +#endif /* DWC2_REGS_H */ diff --git a/include/hw/usb/hcd-musb.h b/include/hw/usb/hcd-musb.h index c874b9f292..f30a26f7f4 100644 --- a/include/hw/usb/hcd-musb.h +++ b/include/hw/usb/hcd-musb.h @@ -10,8 +10,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef HW_USB_MUSB_H -#define HW_USB_MUSB_H +#ifndef HW_USB_HCD_MUSB_H +#define HW_USB_HCD_MUSB_H enum musb_irq_source_e { musb_irq_suspend = 0, diff --git a/include/hw/usb/xlnx-usb-subsystem.h b/include/hw/usb/xlnx-usb-subsystem.h index 999e423951..5b730abd84 100644 --- a/include/hw/usb/xlnx-usb-subsystem.h +++ b/include/hw/usb/xlnx-usb-subsystem.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef XLNX_VERSAL_USB_SUBSYSTEM_H -#define XLNX_VERSAL_USB_SUBSYSTEM_H +#ifndef XLNX_USB_SUBSYSTEM_H +#define XLNX_USB_SUBSYSTEM_H #include "hw/usb/xlnx-versal-usb2-ctrl-regs.h" #include "hw/usb/hcd-dwc3.h" diff --git a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h index b76dce0419..633bf3013a 100644 --- a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h +++ b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h @@ -23,8 +23,8 @@ * THE SOFTWARE. */ -#ifndef XLNX_USB2_REGS_H -#define XLNX_USB2_REGS_H +#ifndef XLNX_VERSAL_USB2_CTRL_REGS_H +#define XLNX_VERSAL_USB2_CTRL_REGS_H #define TYPE_XILINX_VERSAL_USB2_CTRL_REGS "xlnx.versal-usb2-ctrl-regs" diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h index 0d62834c25..94c3aaa84e 100644 --- a/include/hw/virtio/vhost-user-fs.h +++ b/include/hw/virtio/vhost-user-fs.h @@ -11,8 +11,8 @@ * top-level directory. */ -#ifndef _QEMU_VHOST_USER_FS_H -#define _QEMU_VHOST_USER_FS_H +#ifndef QEMU_VHOST_USER_FS_H +#define QEMU_VHOST_USER_FS_H #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" @@ -44,4 +44,4 @@ struct VHostUserFS { /*< public >*/ }; -#endif /* _QEMU_VHOST_USER_FS_H */ +#endif /* QEMU_VHOST_USER_FS_H */ diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h index d8372f3b43..0f7acd40e3 100644 --- a/include/hw/virtio/vhost-user-i2c.h +++ b/include/hw/virtio/vhost-user-i2c.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _QEMU_VHOST_USER_I2C_H -#define _QEMU_VHOST_USER_I2C_H +#ifndef QEMU_VHOST_USER_I2C_H +#define QEMU_VHOST_USER_I2C_H #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" @@ -28,4 +28,4 @@ struct VHostUserI2C { /* Virtio Feature bits */ #define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 -#endif /* _QEMU_VHOST_USER_I2C_H */ +#endif /* QEMU_VHOST_USER_I2C_H */ diff --git a/include/hw/virtio/vhost-user-rng.h b/include/hw/virtio/vhost-user-rng.h index 071539996d..ddd9f01eea 100644 --- a/include/hw/virtio/vhost-user-rng.h +++ b/include/hw/virtio/vhost-user-rng.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _QEMU_VHOST_USER_RNG_H -#define _QEMU_VHOST_USER_RNG_H +#ifndef QEMU_VHOST_USER_RNG_H +#define QEMU_VHOST_USER_RNG_H #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" @@ -30,4 +30,4 @@ struct VHostUserRNG { /*< public >*/ }; -#endif /* _QEMU_VHOST_USER_RNG_H */ +#endif /* QEMU_VHOST_USER_RNG_H */ diff --git a/include/hw/virtio/vhost-user-vsock.h b/include/hw/virtio/vhost-user-vsock.h index 4cfd558245..67aa46c952 100644 --- a/include/hw/virtio/vhost-user-vsock.h +++ b/include/hw/virtio/vhost-user-vsock.h @@ -8,8 +8,8 @@ * top-level directory. */ -#ifndef _QEMU_VHOST_USER_VSOCK_H -#define _QEMU_VHOST_USER_VSOCK_H +#ifndef QEMU_VHOST_USER_VSOCK_H +#define QEMU_VHOST_USER_VSOCK_H #include "hw/virtio/vhost-vsock-common.h" #include "hw/virtio/vhost-user.h" @@ -33,4 +33,4 @@ struct VHostUserVSock { /*< public >*/ }; -#endif /* _QEMU_VHOST_USER_VSOCK_H */ +#endif /* QEMU_VHOST_USER_VSOCK_H */ diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index e44a41bb70..c6e693cd3f 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -11,20 +11,61 @@ #include "chardev/char-fe.h" #include "hw/virtio/virtio.h" +/** + * VhostUserHostNotifier - notifier information for one queue + * @rcu: rcu_head for cleanup + * @mr: memory region of notifier + * @addr: current mapped address + * @unmap_addr: address to be un-mapped + * @idx: virtioqueue index + * + * The VhostUserHostNotifier entries are re-used. When an old mapping + * is to be released it is moved to @unmap_addr and @addr is replaced. + * Once the RCU process has completed the unmap @unmap_addr is + * cleared. + */ typedef struct VhostUserHostNotifier { struct rcu_head rcu; MemoryRegion mr; void *addr; void *unmap_addr; + int idx; } VhostUserHostNotifier; +/** + * VhostUserState - shared state for all vhost-user devices + * @chr: the character backend for the socket + * @notifiers: GPtrArray of @VhostUserHostnotifier + * @memory_slots: + */ typedef struct VhostUserState { CharBackend *chr; - VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX]; + GPtrArray *notifiers; int memory_slots; + bool supports_config; } VhostUserState; +/** + * vhost_user_init() - initialise shared vhost_user state + * @user: allocated area for storing shared state + * @chr: the chardev for the vhost socket + * @errp: error handle + * + * User can either directly g_new() space for the state or embed + * VhostUserState in their larger device structure and just point to + * it. + * + * Return: true on success, false on error while setting errp. + */ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp); + +/** + * vhost_user_cleanup() - cleanup state + * @user: ptr to use state + * + * Cleans up shared state and notifiers, callee is responsible for + * freeing the @VhostUserState memory itself. + */ void vhost_user_cleanup(VhostUserState *user); #endif diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h index d8b565b4da..93c782101d 100644 --- a/include/hw/virtio/vhost-vsock-common.h +++ b/include/hw/virtio/vhost-vsock-common.h @@ -8,8 +8,8 @@ * top-level directory. */ -#ifndef _QEMU_VHOST_VSOCK_COMMON_H -#define _QEMU_VHOST_VSOCK_COMMON_H +#ifndef QEMU_VHOST_VSOCK_COMMON_H +#define QEMU_VHOST_VSOCK_COMMON_H #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" @@ -44,9 +44,9 @@ int vhost_vsock_common_start(VirtIODevice *vdev); void vhost_vsock_common_stop(VirtIODevice *vdev); int vhost_vsock_common_pre_save(void *opaque); int vhost_vsock_common_post_load(void *opaque, int version_id); -void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name); +void vhost_vsock_common_realize(VirtIODevice *vdev); void vhost_vsock_common_unrealize(VirtIODevice *vdev); uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features, Error **errp); -#endif /* _QEMU_VHOST_VSOCK_COMMON_H */ +#endif /* QEMU_VHOST_VSOCK_COMMON_H */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 58a73e7b7a..b291fe4e24 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -61,6 +61,12 @@ typedef struct VhostDevConfigOps { } VhostDevConfigOps; struct vhost_memory; + +/** + * struct vhost_dev - common vhost_dev structure + * @vhost_ops: backend specific ops + * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier) + */ struct vhost_dev { VirtIODevice *vdev; MemoryListener memory_listener; @@ -108,15 +114,129 @@ struct vhost_net { NetClientState *nc; }; +/** + * vhost_dev_init() - initialise the vhost interface + * @hdev: the common vhost_dev structure + * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa) + * @backend_type: type of backend + * @busyloop_timeout: timeout for polling virtqueue + * @errp: error handle + * + * The initialisation of the vhost device will trigger the + * initialisation of the backend and potentially capability + * negotiation of backend interface. Configuration of the VirtIO + * itself won't happen until the interface is started. + * + * Return: 0 on success, non-zero on error while setting errp. + */ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout, Error **errp); + +/** + * vhost_dev_cleanup() - tear down and cleanup vhost interface + * @hdev: the common vhost_dev structure + */ void vhost_dev_cleanup(struct vhost_dev *hdev); -int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * vhost_dev_enable_notifiers() - enable event notifiers + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Enable notifications directly to the vhost device rather than being + * triggered by QEMU itself. Notifications should be enabled before + * the vhost device is started via @vhost_dev_start. + * + * Return: 0 on success, < 0 on error. + */ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * vhost_dev_disable_notifiers - disable event notifications + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Disable direct notifications to vhost device. + */ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); +/** + * vhost_dev_start() - start the vhost device + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Starts the vhost device. From this point VirtIO feature negotiation + * can start and the device can start processing VirtIO transactions. + * + * Return: 0 on success, < 0 on error. + */ +int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * vhost_dev_stop() - stop the vhost device + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Stop the vhost device. After the device is stopped the notifiers + * can be disabled (@vhost_dev_disable_notifiers) and the device can + * be torn down (@vhost_dev_cleanup). + */ +void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * DOC: vhost device configuration handling + * + * The VirtIO device configuration space is used for rarely changing + * or initialisation time parameters. The configuration can be updated + * by either the guest driver or the device itself. If the device can + * change the configuration over time the vhost handler should + * register a @VhostDevConfigOps structure with + * @vhost_dev_set_config_notifier so the guest can be notified. Some + * devices register a handler anyway and will signal an error if an + * unexpected config change happens. + */ + +/** + * vhost_dev_get_config() - fetch device configuration + * @hdev: common vhost_dev_structure + * @config: pointer to device appropriate config structure + * @config_len: size of device appropriate config structure + * + * Return: 0 on success, < 0 on error while setting errp + */ +int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, + uint32_t config_len, Error **errp); + +/** + * vhost_dev_set_config() - set device configuration + * @hdev: common vhost_dev_structure + * @data: pointer to data to set + * @offset: offset into configuration space + * @size: length of set + * @flags: @VhostSetConfigType flags + * + * By use of @offset/@size a subset of the configuration space can be + * written to. The @flags are used to indicate if it is a normal + * transaction or related to migration. + * + * Return: 0 on success, non-zero on error + */ +int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); + +/** + * vhost_dev_set_config_notifier() - register VhostDevConfigOps + * @hdev: common vhost_dev_structure + * @ops: notifier ops + * + * If the device is expected to change configuration a notifier can be + * setup to handle the case. + */ +void vhost_dev_set_config_notifier(struct vhost_dev *dev, + const VhostDevConfigOps *ops); + + /* Test and clear masked event pending status. * Should be called after unmask to avoid losing events. */ @@ -136,14 +256,6 @@ int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); -int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, - uint32_t config_len, Error **errp); -int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, - uint32_t offset, uint32_t size, uint32_t flags); -/* notifier callback in case vhost device config space changed - */ -void vhost_dev_set_config_notifier(struct vhost_dev *dev, - const VhostDevConfigOps *ops); void vhost_dev_reset_inflight(struct vhost_inflight *inflight); void vhost_dev_free_inflight(struct vhost_inflight *inflight); diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 2179b75703..afff9e158e 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -22,6 +22,7 @@ #include "sysemu/vhost-user-backend.h" #include "standard-headers/linux/virtio_gpu.h" +#include "standard-headers/linux/virtio_ids.h" #include "qom/object.h" #define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base" @@ -37,8 +38,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL) #define TYPE_VHOST_USER_GPU "vhost-user-gpu" OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU) -#define VIRTIO_ID_GPU 16 - struct virtio_gpu_simple_resource { uint32_t resource_id; uint32_t width; diff --git a/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 2446dcd9ae..2446dcd9ae 100644 --- a/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index b62a35fdca..db1c0ddf6b 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -22,6 +22,7 @@ #include "standard-headers/linux/virtio_config.h" #include "standard-headers/linux/virtio_ring.h" #include "qom/object.h" +#include "hw/virtio/vhost.h" /* A guest should never accept this. It implies negotiation is broken. */ #define VIRTIO_F_BAD_FEATURE 30 @@ -102,6 +103,7 @@ struct VirtIODevice bool started; bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ bool disable_legacy_check; + bool vhost_started; VMChangeStateEntry *vmstate; char *bus_name; uint8_t device_endian; @@ -160,13 +162,14 @@ struct VirtioDeviceClass { int (*post_load)(VirtIODevice *vdev); const VMStateDescription *vmsd; bool (*primary_unplug_pending)(void *opaque); + struct vhost_dev *(*get_vhost)(VirtIODevice *vdev); }; void virtio_instance_init_common(Object *proxy_obj, void *data, size_t vdev_size, const char *vdev_name); -void virtio_init(VirtIODevice *vdev, const char *name, - uint16_t device_id, size_t config_size); +void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size); + void virtio_cleanup(VirtIODevice *vdev); void virtio_error(VirtIODevice *vdev, const char *fmt, ...) G_GNUC_PRINTF(2, 3); diff --git a/include/hw/watchdog/wdt_imx2.h b/include/hw/watchdog/wdt_imx2.h index 023d83f48f..600a552d2e 100644 --- a/include/hw/watchdog/wdt_imx2.h +++ b/include/hw/watchdog/wdt_imx2.h @@ -9,8 +9,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef IMX2_WDT_H -#define IMX2_WDT_H +#ifndef WDT_IMX2_H +#define WDT_IMX2_H #include "qemu/bitops.h" #include "hw/sysbus.h" @@ -88,4 +88,4 @@ struct IMX2WdtState { bool wcr_wdt_locked; /* affects WDT (never cleared) */ }; -#endif /* IMX2_WDT_H */ +#endif /* WDT_IMX2_H */ diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h index e747e63514..513c428fe4 100644 --- a/include/io/channel-socket.h +++ b/include/io/channel-socket.h @@ -47,6 +47,8 @@ struct QIOChannelSocket { socklen_t localAddrLen; struct sockaddr_storage remoteAddr; socklen_t remoteAddrLen; + ssize_t zero_copy_queued; + ssize_t zero_copy_sent; }; diff --git a/include/io/channel.h b/include/io/channel.h index 88988979f8..c680ee7480 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, #define QIO_CHANNEL_ERR_BLOCK -2 +#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 + typedef enum QIOChannelFeature QIOChannelFeature; enum QIOChannelFeature { QIO_CHANNEL_FEATURE_FD_PASS, QIO_CHANNEL_FEATURE_SHUTDOWN, QIO_CHANNEL_FEATURE_LISTEN, + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, }; @@ -104,6 +107,7 @@ struct QIOChannelClass { size_t niov, int *fds, size_t nfds, + int flags, Error **errp); ssize_t (*io_readv)(QIOChannel *ioc, const struct iovec *iov, @@ -136,6 +140,8 @@ struct QIOChannelClass { IOHandler *io_read, IOHandler *io_write, void *opaque); + int (*io_flush)(QIOChannel *ioc, + Error **errp); }; /* General I/O handling functions */ @@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, * @niov: the length of the @iov array * @fds: an array of file handles to send * @nfds: number of file handles in @fds + * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) * @errp: pointer to a NULL-initialized error object * * Write data to the IO channel, reading it from the @@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp); /** @@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, * @niov: the length of the @iov array * @fds: an array of file handles to send * @nfds: number of file handles in @fds + * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) * @errp: pointer to a NULL-initialized error object * * @@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, * to be written, yielding from the current coroutine * if required. * + * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, + * instead of waiting for all requested data to be written, + * this function will wait until it's all queued for writing. + * In this case, if the buffer gets changed between queueing and + * sending, the updated buffer will be sent. If this is not a + * desired behavior, it's suggested to call qio_channel_flush() + * before reusing the buffer. + * * Returns: 0 if all bytes were written, or -1 on error */ @@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, const struct iovec *iov, size_t niov, int *fds, size_t nfds, - Error **errp); + int flags, Error **errp); + +/** + * qio_channel_flush: + * @ioc: the channel object + * @errp: pointer to a NULL-initialized error object + * + * Will block until every packet queued with + * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY + * is sent, or return in case of any error. + * + * If not implemented, acts as a no-op, and returns 0. + * + * Returns -1 if any error is found, + * 1 if every send failed to use zero copy. + * 0 otherwise. + */ + +int qio_channel_flush(QIOChannel *ioc, + Error **errp); #endif /* QIO_CHANNEL_H */ diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index 284571badb..d1548d5b11 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -208,17 +208,19 @@ void qemu_co_queue_init(CoQueue *queue); void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock); /** - * Removes the next coroutine from the CoQueue, and wake it up. + * Removes the next coroutine from the CoQueue, and queue it to run after + * the currently-running coroutine yields. * Returns true if a coroutine was removed, false if the queue is empty. - * OK to run from coroutine and non-coroutine context. + * Used from coroutine context, use qemu_co_enter_next outside. */ -bool qemu_co_queue_next(CoQueue *queue); +bool coroutine_fn qemu_co_queue_next(CoQueue *queue); /** - * Empties the CoQueue; all coroutines are woken up. - * OK to run from coroutine and non-coroutine context. + * Empties the CoQueue and queues the coroutine to run after + * the currently-running coroutine yields. + * Used from coroutine context, use qemu_co_enter_all outside. */ -void qemu_co_queue_restart_all(CoQueue *queue); +void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue); /** * Removes the next coroutine from the CoQueue, and wake it up. Unlike @@ -234,6 +236,19 @@ void qemu_co_queue_restart_all(CoQueue *queue); bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock); /** + * Empties the CoQueue, waking the waiting coroutine one at a time. Unlike + * qemu_co_queue_all, this function releases the lock during aio_co_wake + * because it is meant to be used outside coroutine context; in that case, the + * coroutine is entered immediately, before qemu_co_enter_all returns. + * + * If used in coroutine context, qemu_co_enter_all is equivalent to + * qemu_co_queue_all. + */ +#define qemu_co_enter_all(queue, lock) \ + qemu_co_enter_all_impl(queue, QEMU_MAKE_LOCKABLE(lock)) +void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock); + +/** * Checks if the CoQueue is empty. */ bool qemu_co_queue_empty(CoQueue *queue); @@ -334,12 +349,12 @@ void coroutine_fn yield_until_fd_readable(int fd); /** * Increase coroutine pool size */ -void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); +void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); /** - * Devcrease coroutine pool size + * Decrease coroutine pool size */ -void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); +void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); #include "qemu/lockable.h" diff --git a/include/qemu/cpu-float.h b/include/qemu/cpu-float.h index 911099499f..a26b9faf68 100644 --- a/include/qemu/cpu-float.h +++ b/include/qemu/cpu-float.h @@ -1,5 +1,5 @@ -#ifndef QEMU_CPU_FLOAT_H_ -#define QEMU_CPU_FLOAT_H_ +#ifndef QEMU_CPU_FLOAT_H +#define QEMU_CPU_FLOAT_H #include "fpu/softfloat-types.h" @@ -61,4 +61,4 @@ typedef union { #endif } CPU_QuadU; -#endif /* QEMU_CPU_FLOAT_H_ */ +#endif /* QEMU_CPU_FLOAT_H */ diff --git a/include/qemu/crc-ccitt.h b/include/qemu/crc-ccitt.h index 06ee55b159..d6eb49146d 100644 --- a/include/qemu/crc-ccitt.h +++ b/include/qemu/crc-ccitt.h @@ -11,8 +11,8 @@ * SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CRC_CCITT_H -#define _CRC_CCITT_H +#ifndef CRC_CCITT_H +#define CRC_CCITT_H extern uint16_t const crc_ccitt_table[256]; extern uint16_t const crc_ccitt_false_table[256]; @@ -30,4 +30,4 @@ static inline uint16_t crc_ccitt_false_byte(uint16_t crc, const uint8_t c) return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c]; } -#endif /* _CRC_CCITT_H */ +#endif /* CRC_CCITT_H */ diff --git a/include/qemu/help-texts.h b/include/qemu/help-texts.h index ba32cc8b1f..4f265fed8d 100644 --- a/include/qemu/help-texts.h +++ b/include/qemu/help-texts.h @@ -1,5 +1,5 @@ -#ifndef QEMU_COMMON_H -#define QEMU_COMMON_H +#ifndef QEMU_HELP_TEXTS_H +#define QEMU_HELP_TEXTS_H /* Copyright string for -version arguments, About dialogs, etc */ #define QEMU_COPYRIGHT "Copyright (c) 2003-2022 " \ diff --git a/include/qemu/keyval.h b/include/qemu/keyval.h index 2d263286d7..1187b68303 100644 --- a/include/qemu/keyval.h +++ b/include/qemu/keyval.h @@ -2,8 +2,9 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ -#ifndef KEYVAL_H_ -#define KEYVAL_H_ + +#ifndef KEYVAL_H +#define KEYVAL_H QDict *keyval_parse_into(QDict *qdict, const char *params, const char *implied_key, bool *p_help, Error **errp); @@ -11,4 +12,4 @@ QDict *keyval_parse(const char *params, const char *implied_key, bool *help, Error **errp); void keyval_merge(QDict *old, const QDict *new, Error **errp); -#endif /* KEYVAL_H_ */ +#endif /* KEYVAL_H */ diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h index 0f59226727..8ad13c110c 100644 --- a/include/qemu/plugin-memory.h +++ b/include/qemu/plugin-memory.h @@ -37,4 +37,4 @@ struct qemu_plugin_hwaddr { bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, bool is_store, struct qemu_plugin_hwaddr *data); -#endif /* _PLUGIN_MEMORY_H_ */ +#endif /* PLUGIN_MEMORY_H */ diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h index 535ddbf0ae..d0e9d03adf 100644 --- a/include/qemu/qemu-plugin.h +++ b/include/qemu/qemu-plugin.h @@ -7,8 +7,9 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef QEMU_PLUGIN_API_H -#define QEMU_PLUGIN_API_H + +#ifndef QEMU_QEMU_PLUGIN_H +#define QEMU_QEMU_PLUGIN_H #include <inttypes.h> #include <stdbool.h> @@ -624,4 +625,4 @@ uint64_t qemu_plugin_end_code(void); */ uint64_t qemu_plugin_entry_code(void); -#endif /* QEMU_PLUGIN_API_H */ +#endif /* QEMU_QEMU_PLUGIN_H */ diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h index 80cf920fba..3479a2a618 100644 --- a/include/qemu/selfmap.h +++ b/include/qemu/selfmap.h @@ -41,4 +41,4 @@ GSList *read_self_maps(void); */ void free_self_maps(GSList *info); -#endif /* _SELFMAP_H_ */ +#endif /* SELFMAP_H */ diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index b5e86b40ab..50790aee5a 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -278,7 +278,8 @@ #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 -#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ +#define KEY_DASHBOARD KEY_ALL_APPLICATIONS #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 @@ -612,6 +613,7 @@ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ +#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ @@ -660,6 +662,27 @@ /* Select an area of screen to be copied */ #define KEY_SELECTIVE_SCREENSHOT 0x27a +/* Move the focus to the next or previous user controllable element within a UI container */ +#define KEY_NEXT_ELEMENT 0x27b +#define KEY_PREVIOUS_ELEMENT 0x27c + +/* Toggle Autopilot engagement */ +#define KEY_AUTOPILOT_ENGAGE_TOGGLE 0x27d + +/* Shortcut Keys */ +#define KEY_MARK_WAYPOINT 0x27e +#define KEY_SOS 0x27f +#define KEY_NAV_CHART 0x280 +#define KEY_FISHING_CHART 0x281 +#define KEY_SINGLE_RANGE_RADAR 0x282 +#define KEY_DUAL_RANGE_RADAR 0x283 +#define KEY_RADAR_OVERLAY 0x284 +#define KEY_TRADITIONAL_SONAR 0x285 +#define KEY_CLEARVU_SONAR 0x286 +#define KEY_SIDEVU_SONAR 0x287 +#define KEY_NAV_INFO 0x288 +#define KEY_BRIGHTNESS_MENU 0x289 + /* * Some keyboards have keys which do not have a defined meaning, these keys * are intended to be programmed / bound to macros by the user. For most diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index 22e3a85f67..7acd8d4abc 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -81,6 +81,12 @@ #define VIRTIO_F_RING_PACKED 34 /* + * Inorder feature indicates that all buffers are used by the device + * in the same order in which they have been made available. + */ +#define VIRTIO_F_IN_ORDER 35 + +/* * This feature indicates that memory accesses by the driver and the * device are ordered in a way described by the platform. */ diff --git a/include/standard-headers/linux/virtio_crypto.h b/include/standard-headers/linux/virtio_crypto.h index 5ff0b4ee59..68066dafb6 100644 --- a/include/standard-headers/linux/virtio_crypto.h +++ b/include/standard-headers/linux/virtio_crypto.h @@ -37,6 +37,7 @@ #define VIRTIO_CRYPTO_SERVICE_HASH 1 #define VIRTIO_CRYPTO_SERVICE_MAC 2 #define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) @@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) +#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04) +#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05) uint32_t opcode; uint32_t algo; uint32_t flag; @@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req { uint8_t padding[32]; }; +struct virtio_crypto_rsa_session_para { +#define VIRTIO_CRYPTO_RSA_RAW_PADDING 0 +#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1 + uint32_t padding_algo; + +#define VIRTIO_CRYPTO_RSA_NO_HASH 0 +#define VIRTIO_CRYPTO_RSA_MD2 1 +#define VIRTIO_CRYPTO_RSA_MD3 2 +#define VIRTIO_CRYPTO_RSA_MD4 3 +#define VIRTIO_CRYPTO_RSA_MD5 4 +#define VIRTIO_CRYPTO_RSA_SHA1 5 +#define VIRTIO_CRYPTO_RSA_SHA256 6 +#define VIRTIO_CRYPTO_RSA_SHA384 7 +#define VIRTIO_CRYPTO_RSA_SHA512 8 +#define VIRTIO_CRYPTO_RSA_SHA224 9 + uint32_t hash_algo; +}; + +struct virtio_crypto_ecdsa_session_para { +#define VIRTIO_CRYPTO_CURVE_UNKNOWN 0 +#define VIRTIO_CRYPTO_CURVE_NIST_P192 1 +#define VIRTIO_CRYPTO_CURVE_NIST_P224 2 +#define VIRTIO_CRYPTO_CURVE_NIST_P256 3 +#define VIRTIO_CRYPTO_CURVE_NIST_P384 4 +#define VIRTIO_CRYPTO_CURVE_NIST_P521 5 + uint32_t curve_id; + uint32_t padding; +}; + +struct virtio_crypto_akcipher_session_para { +#define VIRTIO_CRYPTO_NO_AKCIPHER 0 +#define VIRTIO_CRYPTO_AKCIPHER_RSA 1 +#define VIRTIO_CRYPTO_AKCIPHER_DSA 2 +#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3 + uint32_t algo; + +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC 1 +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2 + uint32_t keytype; + uint32_t keylen; + + union { + struct virtio_crypto_rsa_session_para rsa; + struct virtio_crypto_ecdsa_session_para ecdsa; + } u; +}; + +struct virtio_crypto_akcipher_create_session_req { + struct virtio_crypto_akcipher_session_para para; + uint8_t padding[36]; +}; + struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 @@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req { mac_create_session; struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_akcipher_create_session_req + akcipher_create_session; struct virtio_crypto_destroy_session_req destroy_session; uint8_t padding[56]; @@ -266,6 +325,14 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) +#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_SIGN \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) +#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03) uint32_t opcode; /* algo should be service-specific algorithms */ uint32_t algo; @@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req { uint8_t padding[32]; }; +struct virtio_crypto_akcipher_para { + uint32_t src_data_len; + uint32_t dst_data_len; +}; + +struct virtio_crypto_akcipher_data_req { + struct virtio_crypto_akcipher_para para; + uint8_t padding[40]; +}; + /* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { struct virtio_crypto_op_header header; @@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_hash_data_req hash_req; struct virtio_crypto_mac_data_req mac_req; struct virtio_crypto_aead_data_req aead_req; + struct virtio_crypto_akcipher_data_req akcipher_req; uint8_t padding[48]; } u; }; @@ -408,6 +486,8 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ +#define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ +#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) @@ -438,7 +518,7 @@ struct virtio_crypto_config { uint32_t max_cipher_key_len; /* Maximum length of authenticated key */ uint32_t max_auth_key_len; - uint32_t reserve; + uint32_t akcipher_algo; /* Maximum size of each crypto request's content */ uint64_t max_size; }; diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index 2e93a74679..415f0c91d7 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -10,8 +10,8 @@ * or later. See the COPYING.LIB file in the top-level directory. */ -#ifndef BLOCK_BACKEND_GS_H -#define BLOCK_BACKEND_GS_H +#ifndef BLOCK_BACKEND_GLOBAL_STATE_H +#define BLOCK_BACKEND_GLOBAL_STATE_H #include "block-backend-common.h" @@ -113,4 +113,4 @@ const BdrvChild *blk_root(BlockBackend *blk); int blk_make_empty(BlockBackend *blk, Error **errp); -#endif /* BLOCK_BACKEND_GS_H */ +#endif /* BLOCK_BACKEND_GLOBAL_STATE_H */ diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 360a408edf..b4030acd74 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -46,8 +46,6 @@ extern int alt_grab; extern int ctrl_grab; extern int graphic_rotate; extern int old_param; -extern int boot_menu; -extern bool boot_strict; extern uint8_t *boot_splash_filedata; extern bool enable_mlock; extern bool enable_cpu_pm; diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h index 121a156933..2ba22bd5fe 100644 --- a/include/tcg/tcg-ldst.h +++ b/include/tcg/tcg-ldst.h @@ -23,7 +23,7 @@ */ #ifndef TCG_LDST_H -#define TCG_LDST_H 1 +#define TCG_LDST_H #ifdef CONFIG_SOFTMMU diff --git a/include/ui/dbus-display.h b/include/ui/dbus-display.h index 88f153c237..7c9ec1a069 100644 --- a/include/ui/dbus-display.h +++ b/include/ui/dbus-display.h @@ -1,5 +1,5 @@ -#ifndef DBUS_DISPLAY_H_ -#define DBUS_DISPLAY_H_ +#ifndef DBUS_DISPLAY_H +#define DBUS_DISPLAY_H #include "qapi/error.h" #include "ui/dbus-module.h" @@ -14,4 +14,4 @@ static inline bool qemu_using_dbus_display(Error **errp) return true; } -#endif /* DBUS_DISPLAY_H_ */ +#endif /* DBUS_DISPLAY_H */ diff --git a/include/ui/dbus-module.h b/include/ui/dbus-module.h index ace4a17a5c..5442ee0bb6 100644 --- a/include/ui/dbus-module.h +++ b/include/ui/dbus-module.h @@ -1,5 +1,5 @@ -#ifndef DBUS_MODULE_H_ -#define DBUS_MODULE_H_ +#ifndef DBUS_MODULE_H +#define DBUS_MODULE_H struct QemuDBusDisplayOps { bool (*add_client)(int csock, Error **errp); @@ -8,4 +8,4 @@ struct QemuDBusDisplayOps { extern int using_dbus_display; extern struct QemuDBusDisplayOps qemu_dbus_display; -#endif /* DBUS_MODULE_H_*/ +#endif /* DBUS_MODULE_H */ diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h index 614cfacfa5..b4e53d3870 100644 --- a/include/user/syscall-trace.h +++ b/include/user/syscall-trace.h @@ -39,4 +39,4 @@ static inline void record_syscall_return(void *cpu, int num, abi_long ret) } -#endif /* _SYSCALL_TRACE_H_ */ +#endif /* SYSCALL_TRACE_H */ diff --git a/io/channel-buffer.c b/io/channel-buffer.c index baa4e2b089..bf52011be2 100644 --- a/io/channel-buffer.c +++ b/io/channel-buffer.c @@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); diff --git a/io/channel-command.c b/io/channel-command.c index 4a1f969aaa..9f2f4a1793 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -276,6 +276,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); diff --git a/io/channel-file.c b/io/channel-file.c index d146ace7db..b67687c2aa 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); diff --git a/io/channel-socket.c b/io/channel-socket.c index e531d7bd2a..dc9c165de1 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -25,6 +25,14 @@ #include "io/channel-watch.h" #include "trace.h" #include "qapi/clone-visitor.h" +#ifdef CONFIG_LINUX +#include <linux/errqueue.h> +#include <sys/socket.h> + +#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) +#define QEMU_MSG_ZEROCOPY +#endif +#endif #define SOCKET_MAX_FDS 16 @@ -54,6 +62,8 @@ qio_channel_socket_new(void) sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); sioc->fd = -1; + sioc->zero_copy_queued = 0; + sioc->zero_copy_sent = 0; ioc = QIO_CHANNEL(sioc); qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); @@ -153,6 +163,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, return -1; } +#ifdef QEMU_MSG_ZEROCOPY + int ret, v = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); + if (ret == 0) { + /* Zero copy available on host */ + qio_channel_set_feature(QIO_CHANNEL(ioc), + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); + } +#endif + return 0; } @@ -524,6 +544,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); @@ -532,6 +553,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; size_t fdsize = sizeof(int) * nfds; struct cmsghdr *cmsg; + int sflags = 0; memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); @@ -556,15 +578,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, memcpy(CMSG_DATA(cmsg), fds, fdsize); } +#ifdef QEMU_MSG_ZEROCOPY + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + sflags = MSG_ZEROCOPY; + } +#endif + retry: - ret = sendmsg(sioc->fd, &msg, 0); + ret = sendmsg(sioc->fd, &msg, sflags); if (ret <= 0) { - if (errno == EAGAIN) { + switch (errno) { + case EAGAIN: return QIO_CHANNEL_ERR_BLOCK; - } - if (errno == EINTR) { + case EINTR: goto retry; +#ifdef QEMU_MSG_ZEROCOPY + case ENOBUFS: + if (sflags & MSG_ZEROCOPY) { + error_setg_errno(errp, errno, + "Process can't lock enough memory for using MSG_ZEROCOPY"); + return -1; + } + break; +#endif } + error_setg_errno(errp, errno, "Unable to write to socket"); return -1; @@ -619,6 +657,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); @@ -657,6 +696,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, } #endif /* WIN32 */ + +#ifdef QEMU_MSG_ZEROCOPY +static int qio_channel_socket_flush(QIOChannel *ioc, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + struct msghdr msg = {}; + struct sock_extended_err *serr; + struct cmsghdr *cm; + char control[CMSG_SPACE(sizeof(*serr))]; + int received; + int ret = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + memset(control, 0, sizeof(control)); + + while (sioc->zero_copy_sent < sioc->zero_copy_queued) { + received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); + if (received < 0) { + switch (errno) { + case EAGAIN: + /* Nothing on errqueue, wait until something is available */ + qio_channel_wait(ioc, G_IO_ERR); + continue; + case EINTR: + continue; + default: + error_setg_errno(errp, errno, + "Unable to read errqueue"); + return -1; + } + } + + cm = CMSG_FIRSTHDR(&msg); + if (cm->cmsg_level != SOL_IP && + cm->cmsg_type != IP_RECVERR) { + error_setg_errno(errp, EPROTOTYPE, + "Wrong cmsg in errqueue"); + return -1; + } + + serr = (void *) CMSG_DATA(cm); + if (serr->ee_errno != SO_EE_ORIGIN_NONE) { + error_setg_errno(errp, serr->ee_errno, + "Error on socket"); + return -1; + } + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + error_setg_errno(errp, serr->ee_origin, + "Error not from zero copy"); + return -1; + } + + /* No errors, count successfully finished sendmsg()*/ + sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; + + /* If any sendmsg() succeeded using zero copy, return 0 at the end */ + if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { + ret = 0; + } + } + + return ret; +} + +#endif /* QEMU_MSG_ZEROCOPY */ + static int qio_channel_socket_set_blocking(QIOChannel *ioc, bool enabled, @@ -787,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, ioc_klass->io_set_delay = qio_channel_socket_set_delay; ioc_klass->io_create_watch = qio_channel_socket_create_watch; ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; +#ifdef QEMU_MSG_ZEROCOPY + ioc_klass->io_flush = qio_channel_socket_flush; +#endif } static const TypeInfo qio_channel_socket_info = { diff --git a/io/channel-tls.c b/io/channel-tls.c index 2ae1b92fc0..4ce890a538 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); diff --git a/io/channel-websock.c b/io/channel-websock.c index 55145a6a8c..9619906ac3 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); diff --git a/io/channel.c b/io/channel.c index e8b019dc36..0640941ac5 100644 --- a/io/channel.c +++ b/io/channel.c @@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); - if ((fds || nfds) && - !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + if (fds || nfds) { + if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + error_setg_errno(errp, EINVAL, + "Channel does not support file descriptor passing"); + return -1; + } + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + error_setg_errno(errp, EINVAL, + "Zero Copy does not support file descriptor passing"); + return -1; + } + } + + if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && + !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { error_setg_errno(errp, EINVAL, - "Channel does not support file descriptor passing"); + "Requested Zero Copy feature is not available"); return -1; } - return klass->io_writev(ioc, iov, niov, fds, nfds, errp); + return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); } @@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, size_t niov, Error **errp) { - return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); + return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); } int qio_channel_writev_full_all(QIOChannel *ioc, const struct iovec *iov, size_t niov, int *fds, size_t nfds, - Error **errp) + int flags, Error **errp) { int ret = -1; struct iovec *local_iov = g_new(struct iovec, niov); @@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, while (nlocal_iov > 0) { ssize_t len; - len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, - errp); + + len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, + nfds, flags, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { if (qemu_in_coroutine()) { qio_channel_yield(ioc, G_IO_OUT); @@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, size_t niov, Error **errp) { - return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); + return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); } @@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, Error **errp) { struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; - return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); + return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); } @@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, return klass->io_seek(ioc, offset, whence, errp); } +int qio_channel_flush(QIOChannel *ioc, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (!klass->io_flush || + !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + return 0; + } + + return klass->io_flush(ioc, errp); +} + static void qio_channel_restart_read(void *opaque) { diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 3d2ce9912d..5c28a9737a 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) @@ -362,6 +367,7 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 #define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 @@ -411,6 +417,16 @@ struct kvm_arm_copy_mte_tags { #define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS #define KVM_PSCI_RET_DENIED PSCI_RET_DENIED +/* arm64-specific kvm_run::system_event flags */ +/* + * Reset caused by a PSCI v1.1 SYSTEM_RESET2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_RESET. + */ +#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) + +/* run->fail_entry.hardware_entry_failure_reason codes. */ +#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h index 1567a3294c..6c1aa92a92 100644 --- a/linux-headers/asm-generic/mman-common.h +++ b/linux-headers/asm-generic/mman-common.h @@ -75,6 +75,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h index 40b210c65a..1be428663c 100644 --- a/linux-headers/asm-mips/mman.h +++ b/linux-headers/asm-mips/mman.h @@ -101,6 +101,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index d232feaae9..0d05d02ee4 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -445,7 +445,11 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + union { + __u64 flags; + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -562,9 +566,12 @@ struct kvm_s390_mem_op { __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ union { - __u8 ar; /* the access register number */ + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* should be set to 0 */ + __u8 reserved[32]; /* ignored */ }; }; /* types for kvm_s390_mem_op->op */ @@ -572,9 +579,12 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_LOGICAL_WRITE 1 #define KVM_S390_MEMOP_SIDA_READ 2 #define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) /* for KVM_INTERRUPT */ struct kvm_interrupt { @@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 +#define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 +#define KVM_CAP_DISABLE_QUIRKS2 213 +/* #define KVM_CAP_VM_TSC_CONTROL 214 */ +#define KVM_CAP_SYSTEM_EVENT_DATA 215 #ifdef KVM_CAP_IRQ_ROUTING @@ -1624,9 +1640,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h index a6772d508b..213b2a0f70 100644 --- a/linux-headers/linux/psci.h +++ b/linux-headers/linux/psci.h @@ -82,6 +82,10 @@ #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2 +/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */ +#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 +#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 8479af5f4c..769b8379e4 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -32,7 +32,8 @@ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ UFFD_FEATURE_MINOR_HUGETLBFS | \ - UFFD_FEATURE_MINOR_SHMEM) + UFFD_FEATURE_MINOR_SHMEM | \ + UFFD_FEATURE_EXACT_ADDRESS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -189,6 +190,10 @@ struct uffdio_api { * * UFFD_FEATURE_MINOR_SHMEM indicates the same support as * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. + * + * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page + * faults would be provided and the offset within the page would not be + * masked. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -201,6 +206,7 @@ struct uffdio_api { #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) #define UFFD_FEATURE_MINOR_SHMEM (1<<10) +#define UFFD_FEATURE_EXACT_ADDRESS (1<<11) __u64 features; __u64 ioctls; diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index e680594f27..e9f7795c39 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -323,7 +323,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) -#define VFIO_REGION_TYPE_MIGRATION (3) +#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -405,225 +405,29 @@ struct vfio_region_gfx_edid { #define VFIO_REGION_SUBTYPE_CCW_CRW (3) /* sub-types for VFIO_REGION_TYPE_MIGRATION */ -#define VFIO_REGION_SUBTYPE_MIGRATION (1) - -/* - * The structure vfio_device_migration_info is placed at the 0th offset of - * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related - * migration information. Field accesses from this structure are only supported - * at their native width and alignment. Otherwise, the result is undefined and - * vendor drivers should return an error. - * - * device_state: (read/write) - * - The user application writes to this field to inform the vendor driver - * about the device state to be transitioned to. - * - The vendor driver should take the necessary actions to change the - * device state. After successful transition to a given state, the - * vendor driver should return success on write(device_state, state) - * system call. If the device state transition fails, the vendor driver - * should return an appropriate -errno for the fault condition. - * - On the user application side, if the device state transition fails, - * that is, if write(device_state, state) returns an error, read - * device_state again to determine the current state of the device from - * the vendor driver. - * - The vendor driver should return previous state of the device unless - * the vendor driver has encountered an internal error, in which case - * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. - * - The user application must use the device reset ioctl to recover the - * device from VFIO_DEVICE_STATE_ERROR state. If the device is - * indicated to be in a valid device state by reading device_state, the - * user application may attempt to transition the device to any valid - * state reachable from the current state or terminate itself. - * - * device_state consists of 3 bits: - * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, - * it indicates the _STOP state. When the device state is changed to - * _STOP, driver should stop the device before write() returns. - * - If bit 1 is set, it indicates the _SAVING state, which means that the - * driver should start gathering device state information that will be - * provided to the VFIO user application to save the device's state. - * - If bit 2 is set, it indicates the _RESUMING state, which means that - * the driver should prepare to resume the device. Data provided through - * the migration region should be used to resume the device. - * Bits 3 - 31 are reserved for future use. To preserve them, the user - * application should perform a read-modify-write operation on this - * field when modifying the specified bits. - * - * +------- _RESUMING - * |+------ _SAVING - * ||+----- _RUNNING - * ||| - * 000b => Device Stopped, not saving or resuming - * 001b => Device running, which is the default state - * 010b => Stop the device & save the device state, stop-and-copy state - * 011b => Device running and save the device state, pre-copy state - * 100b => Device stopped and the device state is resuming - * 101b => Invalid state - * 110b => Error state - * 111b => Invalid state - * - * State transitions: - * - * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP - * (100b) (001b) (011b) (010b) (000b) - * 0. Running or default state - * | - * - * 1. Normal Shutdown (optional) - * |------------------------------------->| - * - * 2. Save the state or suspend - * |------------------------->|---------->| - * - * 3. Save the state during live migration - * |----------->|------------>|---------->| - * - * 4. Resuming - * |<---------| - * - * 5. Resumed - * |--------->| - * - * 0. Default state of VFIO device is _RUNNING when the user application starts. - * 1. During normal shutdown of the user application, the user application may - * optionally change the VFIO device state from _RUNNING to _STOP. This - * transition is optional. The vendor driver must support this transition but - * must not require it. - * 2. When the user application saves state or suspends the application, the - * device state transitions from _RUNNING to stop-and-copy and then to _STOP. - * On state transition from _RUNNING to stop-and-copy, driver must stop the - * device, save the device state and send it to the application through the - * migration region. The sequence to be followed for such transition is given - * below. - * 3. In live migration of user application, the state transitions from _RUNNING - * to pre-copy, to stop-and-copy, and to _STOP. - * On state transition from _RUNNING to pre-copy, the driver should start - * gathering the device state while the application is still running and send - * the device state data to application through the migration region. - * On state transition from pre-copy to stop-and-copy, the driver must stop - * the device, save the device state and send it to the user application - * through the migration region. - * Vendor drivers must support the pre-copy state even for implementations - * where no data is provided to the user before the stop-and-copy state. The - * user must not be required to consume all migration data before the device - * transitions to a new state, including the stop-and-copy state. - * The sequence to be followed for above two transitions is given below. - * 4. To start the resuming phase, the device state should be transitioned from - * the _RUNNING to the _RESUMING state. - * In the _RESUMING state, the driver should use the device state data - * received through the migration region to resume the device. - * 5. After providing saved device data to the driver, the application should - * change the state from _RESUMING to _RUNNING. - * - * reserved: - * Reads on this field return zero and writes are ignored. - * - * pending_bytes: (read only) - * The number of pending bytes still to be migrated from the vendor driver. - * - * data_offset: (read only) - * The user application should read data_offset field from the migration - * region. The user application should read the device data from this - * offset within the migration region during the _SAVING state or write - * the device data during the _RESUMING state. See below for details of - * sequence to be followed. - * - * data_size: (read/write) - * The user application should read data_size to get the size in bytes of - * the data copied in the migration region during the _SAVING state and - * write the size in bytes of the data copied in the migration region - * during the _RESUMING state. - * - * The format of the migration region is as follows: - * ------------------------------------------------------------------ - * |vfio_device_migration_info| data section | - * | | /////////////////////////////// | - * ------------------------------------------------------------------ - * ^ ^ - * offset 0-trapped part data_offset - * - * The structure vfio_device_migration_info is always followed by the data - * section in the region, so data_offset will always be nonzero. The offset - * from where the data is copied is decided by the kernel driver. The data - * section can be trapped, mmapped, or partitioned, depending on how the kernel - * driver defines the data section. The data section partition can be defined - * as mapped by the sparse mmap capability. If mmapped, data_offset must be - * page aligned, whereas initial section which contains the - * vfio_device_migration_info structure, might not end at the offset, which is - * page aligned. The user is not required to access through mmap regardless - * of the capabilities of the region mmap. - * The vendor driver should determine whether and how to partition the data - * section. The vendor driver should return data_offset accordingly. - * - * The sequence to be followed while in pre-copy state and stop-and-copy state - * is as follows: - * a. Read pending_bytes, indicating the start of a new iteration to get device - * data. Repeated read on pending_bytes at this stage should have no side - * effects. - * If pending_bytes == 0, the user application should not iterate to get data - * for that device. - * If pending_bytes > 0, perform the following steps. - * b. Read data_offset, indicating that the vendor driver should make data - * available through the data section. The vendor driver should return this - * read operation only after data is available from (region + data_offset) - * to (region + data_offset + data_size). - * c. Read data_size, which is the amount of data in bytes available through - * the migration region. - * Read on data_offset and data_size should return the offset and size of - * the current buffer if the user application reads data_offset and - * data_size more than once here. - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * e. Process the data. - * f. Read pending_bytes, which indicates that the data from the previous - * iteration has been read. If pending_bytes > 0, go to step b. - * - * The user application can transition from the _SAVING|_RUNNING - * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the - * number of pending bytes. The user application should iterate in _SAVING - * (stop-and-copy) until pending_bytes is 0. - * - * The sequence to be followed while _RESUMING device state is as follows: - * While data for this device is available, repeat the following steps: - * a. Read data_offset from where the user application should write data. - * b. Write migration data starting at the migration region + data_offset for - * the length determined by data_size from the migration source. - * c. Write data_size, which indicates to the vendor driver that data is - * written in the migration region. Vendor driver must return this write - * operations on consuming data. Vendor driver should apply the - * user-provided migration region data to the device resume state. - * - * If an error occurs during the above sequences, the vendor driver can return - * an error code for next read() or write() operation, which will terminate the - * loop. The user application should then take the next necessary action, for - * example, failing migration or terminating the user application. - * - * For the user application, data is opaque. The user application should write - * data in the same order as the data is received and the data should be of - * same transaction size at the source. - */ +#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1) struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -#define VFIO_DEVICE_STATE_STOP (0) -#define VFIO_DEVICE_STATE_RUNNING (1 << 0) -#define VFIO_DEVICE_STATE_SAVING (1 << 1) -#define VFIO_DEVICE_STATE_RESUMING (1 << 2) -#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ - VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) +#define VFIO_DEVICE_STATE_V1_STOP (0) +#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \ + VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) #define VFIO_DEVICE_STATE_VALID(state) \ - (state & VFIO_DEVICE_STATE_RESUMING ? \ - (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + (state & VFIO_DEVICE_STATE_V1_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1) #define VFIO_DEVICE_STATE_IS_ERROR(state) \ - ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING)) + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING)) #define VFIO_DEVICE_STATE_SET_ERROR(state) \ - ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) __u32 reserved; __u64 pending_bytes; @@ -1002,6 +806,186 @@ struct vfio_device_feature { */ #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) +/* + * Indicates the device can support the migration API through + * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and + * ERROR states are always supported. Support for additional states is + * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be + * set. + * + * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and + * RESUMING are supported. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P + * is supported in addition to the STOP_COPY states. + * + * Other combinations of flags have behavior to be defined in the future. + */ +struct vfio_device_feature_migration { + __aligned_u64 flags; +#define VFIO_MIGRATION_STOP_COPY (1 << 0) +#define VFIO_MIGRATION_P2P (1 << 1) +}; +#define VFIO_DEVICE_FEATURE_MIGRATION 1 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO + * device. The new state is supplied in device_state, see enum + * vfio_device_mig_state for details + * + * The kernel migration driver must fully transition the device to the new state + * value before the operation returns to the user. + * + * The kernel migration driver must not generate asynchronous device state + * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET + * ioctl as described above. + * + * If this function fails then current device_state may be the original + * operating state or some other state along the combination transition path. + * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt + * to return to the original state, or attempt to return to some other state + * such as RUNNING or STOP. + * + * If the new_state starts a new data transfer session then the FD associated + * with that session is returned in data_fd. The user is responsible to close + * this FD when it is finished. The user must consider the migration data stream + * carried over the FD to be opaque and must preserve the byte order of the + * stream. The user is not required to preserve buffer segmentation when writing + * the data stream during the RESUMING operation. + * + * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO + * device, data_fd will be -1. + */ +struct vfio_device_feature_mig_state { + __u32 device_state; /* From enum vfio_device_mig_state */ + __s32 data_fd; +}; +#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2 + +/* + * The device migration Finite State Machine is described by the enum + * vfio_device_mig_state. Some of the FSM arcs will create a migration data + * transfer session by returning a FD, in this case the migration data will + * flow over the FD using read() and write() as discussed below. + * + * There are 5 states to support VFIO_MIGRATION_STOP_COPY: + * RUNNING - The device is running normally + * STOP - The device does not change the internal or external state + * STOP_COPY - The device internal state can be read out + * RESUMING - The device is stopped and is loading a new internal state + * ERROR - The device has failed and must be reset + * + * And 1 optional state to support VFIO_MIGRATION_P2P: + * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * + * The FSM takes actions on the arcs between FSM states. The driver implements + * the following behavior for the FSM arcs: + * + * RUNNING_P2P -> STOP + * STOP_COPY -> STOP + * While in STOP the device must stop the operation of the device. The device + * must not generate interrupts, DMA, or any other change to external state. + * It must not change its internal state. When stopped the device and kernel + * migration driver must accept and respond to interaction to support external + * subsystems in the STOP state, for example PCI MSI-X and PCI config space. + * Failure by the user to restrict device access while in STOP must not result + * in error conditions outside the user context (ex. host system faults). + * + * The STOP_COPY arc will terminate a data transfer session. + * + * RESUMING -> STOP + * Leaving RESUMING terminates a data transfer session and indicates the + * device should complete processing of the data delivered by write(). The + * kernel migration driver should complete the incorporation of data written + * to the data transfer FD into the device internal state and perform + * final validity and consistency checking of the new device state. If the + * user provided data is found to be incomplete, inconsistent, or otherwise + * invalid, the migration driver must fail the SET_STATE ioctl and + * optionally go to the ERROR state as described below. + * + * While in STOP the device has the same behavior as other STOP states + * described above. + * + * To abort a RESUMING session the device must be reset. + * + * RUNNING_P2P -> RUNNING + * While in RUNNING the device is fully operational, the device may generate + * interrupts, DMA, respond to MMIO, all vfio device regions are functional, + * and the device may advance its internal state. + * + * RUNNING -> RUNNING_P2P + * STOP -> RUNNING_P2P + * While in RUNNING_P2P the device is partially running in the P2P quiescent + * state defined below. + * + * STOP -> STOP_COPY + * This arc begin the process of saving the device state and will return a + * new data_fd. + * + * While in the STOP_COPY state the device has the same behavior as STOP + * with the addition that the data transfers session continues to stream the + * migration state. End of stream on the FD indicates the entire device + * state has been transferred. + * + * The user should take steps to restrict access to vfio device regions while + * the device is in STOP_COPY or risk corruption of the device migration data + * stream. + * + * STOP -> RESUMING + * Entering the RESUMING state starts a process of restoring the device state + * and will return a new data_fd. The data stream fed into the data_fd should + * be taken from the data transfer output of a single FD during saving from + * a compatible device. The migration driver may alter/reset the internal + * device state for this arc if required to prepare the device to receive the + * migration data. + * + * any -> ERROR + * ERROR cannot be specified as a device state, however any transition request + * can be failed with an errno return and may then move the device_state into + * ERROR. In this case the device was unable to execute the requested arc and + * was also unable to restore the device to any valid device_state. + * To recover from ERROR VFIO_DEVICE_RESET must be used to return the + * device_state back to RUNNING. + * + * The optional peer to peer (P2P) quiescent state is intended to be a quiescent + * state for the device for the purposes of managing multiple devices within a + * user context where peer-to-peer DMA between devices may be active. The + * RUNNING_P2P states must prevent the device from initiating + * any new P2P DMA transactions. If the device can identify P2P transactions + * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration + * driver must complete any such outstanding operations prior to completing the + * FSM arc into a P2P state. For the purpose of specification the states + * behave as though the device was fully running if not supported. Like while in + * STOP or STOP_COPY the user must not touch the device, otherwise the state + * can be exited. + * + * The remaining possible transitions are interpreted as combinations of the + * above FSM arcs. As there are multiple paths through the FSM arcs the path + * should be selected based on the following rules: + * - Select the shortest path. + * Refer to vfio_mig_get_next_state() for the result of the algorithm. + * + * The automatic transit through the FSM arcs that make up the combination + * transition is invisible to the user. When working with combination arcs the + * user may see any step along the path in the device_state if SET_STATE + * fails. When handling these types of errors users should anticipate future + * revisions of this protocol using new states and those states becoming + * visible in this case. + * + * The optional states cannot be used with SET_STATE if the device does not + * support them. The user can discover if these states are supported by using + * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can + * avoid knowing about these optional states if the kernel driver supports them. + */ +enum vfio_device_mig_state { + VFIO_DEVICE_STATE_ERROR = 0, + VFIO_DEVICE_STATE_STOP = 1, + VFIO_DEVICE_STATE_RUNNING = 2, + VFIO_DEVICE_STATE_STOP_COPY = 3, + VFIO_DEVICE_STATE_RESUMING = 4, + VFIO_DEVICE_STATE_RUNNING_P2P = 5, +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..5d99e7c242 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif diff --git a/linux-user/hexagon/target_signal.h b/linux-user/hexagon/target_signal.h index 193abac340..68fb71312e 100644 --- a/linux-user/hexagon/target_signal.h +++ b/linux-user/hexagon/target_signal.h @@ -22,4 +22,4 @@ #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 -#endif /* TARGET_SIGNAL_H */ +#endif /* HEXAGON_TARGET_SIGNAL_H */ diff --git a/meson.build b/meson.build index 864e97945f..52f6befc0f 100644 --- a/meson.build +++ b/meson.build @@ -515,12 +515,23 @@ if not get_option('linux_aio').auto() or have_block required: get_option('linux_aio'), kwargs: static_kwargs) endif + +linux_io_uring_test = ''' + #include <liburing.h> + #include <linux/errqueue.h> + + int main(void) { return 0; }''' + linux_io_uring = not_found if not get_option('linux_io_uring').auto() or have_block linux_io_uring = dependency('liburing', version: '>=0.3', required: get_option('linux_io_uring'), method: 'pkg-config', kwargs: static_kwargs) + if not cc.links(linux_io_uring_test) + linux_io_uring = not_found + endif endif + libnfs = not_found if not get_option('libnfs').auto() or have_block libnfs = dependency('libnfs', version: '>=1.9.3', @@ -569,6 +580,18 @@ if cocoa.found() and get_option('gtk').enabled() error('Cocoa and GTK+ cannot be enabled at the same time') endif +vmnet = dependency('appleframeworks', modules: 'vmnet', required: get_option('vmnet')) +if vmnet.found() and not cc.has_header_symbol('vmnet/vmnet.h', + 'VMNET_BRIDGED_MODE', + dependencies: vmnet) + vmnet = not_found + if get_option('vmnet').enabled() + error('vmnet.framework API is outdated') + else + warning('vmnet.framework API is outdated, disabling') + endif +endif + seccomp = not_found if not get_option('seccomp').auto() or have_system or have_tools seccomp = dependency('libseccomp', version: '>=2.3.0', @@ -1730,6 +1753,7 @@ config_host_data.set('CONFIG_VHOST_KERNEL', have_vhost_kernel) config_host_data.set('CONFIG_VHOST_USER', have_vhost_user) config_host_data.set('CONFIG_VHOST_CRYPTO', have_vhost_user_crypto) config_host_data.set('CONFIG_VHOST_VDPA', have_vhost_vdpa) +config_host_data.set('CONFIG_VMNET', vmnet.found()) config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER', have_vhost_user_blk_server) config_host_data.set('CONFIG_PNG', png.found()) config_host_data.set('CONFIG_VNC', vnc.found()) @@ -1742,6 +1766,7 @@ config_host_data.set('CONFIG_KEYUTILS', keyutils.found()) config_host_data.set('CONFIG_GETTID', has_gettid) config_host_data.set('CONFIG_GNUTLS', gnutls.found()) config_host_data.set('CONFIG_GNUTLS_CRYPTO', gnutls_crypto.found()) +config_host_data.set('CONFIG_TASN1', tasn1.found()) config_host_data.set('CONFIG_GCRYPT', gcrypt.found()) config_host_data.set('CONFIG_NETTLE', nettle.found()) config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private') @@ -2179,7 +2204,8 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \ have_pvrdma = get_option('pvrdma') \ .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \ - .require(cc.compiles(''' + .require(cc.compiles(gnu_source_prefix + ''' + #include <sys/mman.h> int main(void) { char buf = 0; @@ -2190,7 +2216,7 @@ have_pvrdma = get_option('pvrdma') \ }'''), error_message: 'PVRDMA requires mremap').allowed() if have_pvrdma - config_host_data.set('LEGACY_RDMA_REG_MR', not cc.compiles(''' + config_host_data.set('LEGACY_RDMA_REG_MR', not cc.links(''' #include <infiniband/verbs.h> int main(void) { @@ -2636,10 +2662,25 @@ if have_system slirp_opt = get_option('slirp') if slirp_opt in ['enabled', 'auto', 'system'] have_internal = fs.exists(meson.current_source_dir() / 'slirp/meson.build') + slirp_dep_required = (slirp_opt == 'system' or + slirp_opt == 'enabled' and not have_internal) slirp = dependency('slirp', kwargs: static_kwargs, - method: 'pkg-config', - required: slirp_opt == 'system' or - slirp_opt == 'enabled' and not have_internal) + method: 'pkg-config', version: '>=4.1.0', + required: slirp_dep_required) + # slirp <4.7 is incompatible with CFI support in QEMU. This is because + # it passes function pointers within libslirp as callbacks for timers. + # When using a system-wide shared libslirp, the type information for the + # callback is missing and the timer call produces a false positive with CFI. + # Do not use the "version" keyword argument to produce a better error. + # with control-flow integrity. + if get_option('cfi') and slirp.found() and slirp.version().version_compare('<4.7') + if slirp_dep_required + error('Control-Flow Integrity requires libslirp 4.7.') + else + warning('Control-Flow Integrity requires libslirp 4.7, not using system-wide libslirp.') + slirp = not_found + endif + endif if slirp.found() slirp_opt = 'system' elif have_internal @@ -2712,18 +2753,6 @@ if have_system endif endif -# For CFI, we need to compile slirp as a static library together with qemu. -# This is because we register slirp functions as callbacks for QEMU Timers. -# When using a system-wide shared libslirp, the type information for the -# callback is missing and the timer call produces a false positive with CFI. -# -# Now that slirp_opt has been defined, check if the selected slirp is compatible -# with control-flow integrity. -if get_option('cfi') and slirp_opt == 'system' - error('Control-Flow Integrity is not compatible with system-wide slirp.' \ - + ' Please configure with --enable-slirp=git') -endif - fdt = not_found if have_system fdt_opt = get_option('fdt') @@ -3881,7 +3910,8 @@ summary(summary_info, bool_yn: true, section: 'Crypto') # Libraries summary_info = {} if targetos == 'darwin' - summary_info += {'Cocoa support': cocoa} + summary_info += {'Cocoa support': cocoa} + summary_info += {'vmnet.framework support': vmnet} endif summary_info += {'SDL support': sdl} summary_info += {'SDL image support': sdl_image} diff --git a/meson_options.txt b/meson_options.txt index 29c6b90cec..7325cdad45 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -197,6 +197,8 @@ option('netmap', type : 'feature', value : 'auto', description: 'netmap network backend support') option('vde', type : 'feature', value : 'auto', description: 'vde network backend support') +option('vmnet', type : 'feature', value : 'auto', + description: 'vmnet.framework network backend support') option('virglrenderer', type : 'feature', value : 'auto', description: 'virgl rendering support') option('png', type : 'feature', value : 'auto', diff --git a/migration/channel.c b/migration/channel.c index c6a8dcf1d7..a162d00fea 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) trace_migration_set_incoming_channel( ioc, object_get_typename(OBJECT(ioc))); - if (s->parameters.tls_creds && - *s->parameters.tls_creds && + if (migrate_use_tls() && !object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { migration_tls_channel_process_incoming(s, ioc, &local_err); diff --git a/migration/migration.c b/migration/migration.c index 5a31b23bd6..31739b2af9 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -910,6 +910,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->multifd_zlib_level = s->parameters.multifd_zlib_level; params->has_multifd_zstd_level = true; params->multifd_zstd_level = s->parameters.multifd_zstd_level; +#ifdef CONFIG_LINUX + params->has_zero_copy_send = true; + params->zero_copy_send = s->parameters.zero_copy_send; +#endif params->has_xbzrle_cache_size = true; params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; params->has_max_postcopy_bandwidth = true; @@ -1493,7 +1497,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); return false; } - +#ifdef CONFIG_LINUX + if (params->zero_copy_send && + (!migrate_use_multifd() || + params->multifd_compression != MULTIFD_COMPRESSION_NONE || + (params->tls_creds && *params->tls_creds))) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); + return false; + } +#endif return true; } @@ -1567,6 +1580,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_multifd_compression) { dest->multifd_compression = params->multifd_compression; } +#ifdef CONFIG_LINUX + if (params->has_zero_copy_send) { + dest->zero_copy_send = params->zero_copy_send; + } +#endif if (params->has_xbzrle_cache_size) { dest->xbzrle_cache_size = params->xbzrle_cache_size; } @@ -1679,6 +1697,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_multifd_compression) { s->parameters.multifd_compression = params->multifd_compression; } +#ifdef CONFIG_LINUX + if (params->has_zero_copy_send) { + s->parameters.zero_copy_send = params->zero_copy_send; + } +#endif if (params->has_xbzrle_cache_size) { s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; xbzrle_cache_resize(params->xbzrle_cache_size, errp); @@ -2563,6 +2586,26 @@ int migrate_multifd_zstd_level(void) return s->parameters.multifd_zstd_level; } +#ifdef CONFIG_LINUX +bool migrate_use_zero_copy_send(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.zero_copy_send; +} +#endif + +int migrate_use_tls(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.tls_creds && *s->parameters.tls_creds; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -4206,6 +4249,10 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, parameters.multifd_zstd_level, DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +#ifdef CONFIG_LINUX + DEFINE_PROP_BOOL("zero_copy_send", MigrationState, + parameters.zero_copy_send, false), +#endif DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, parameters.xbzrle_cache_size, DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), @@ -4303,6 +4350,9 @@ static void migration_instance_init(Object *obj) params->has_multifd_compression = true; params->has_multifd_zlib_level = true; params->has_multifd_zstd_level = true; +#ifdef CONFIG_LINUX + params->has_zero_copy_send = true; +#endif params->has_xbzrle_cache_size = true; params->has_max_postcopy_bandwidth = true; params->has_max_cpu_throttle = true; diff --git a/migration/migration.h b/migration/migration.h index a863032b71..485d58b95f 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -375,6 +375,12 @@ MultiFDCompression migrate_multifd_compression(void); int migrate_multifd_zlib_level(void); int migrate_multifd_zstd_level(void); +#ifdef CONFIG_LINUX +bool migrate_use_zero_copy_send(void); +#else +#define migrate_use_zero_copy_send() (false) +#endif +int migrate_use_tls(void); int migrate_use_xbzrle(void); uint64_t migrate_xbzrle_cache_size(void); bool migrate_colo_enabled(void); diff --git a/migration/multifd.c b/migration/multifd.c index 9ea4f581e2..9282ab6aa4 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -566,19 +566,34 @@ void multifd_save_cleanup(void) multifd_send_state = NULL; } -void multifd_send_sync_main(QEMUFile *f) +int multifd_send_sync_main(QEMUFile *f) { int i; + bool flush_zero_copy; if (!migrate_use_multifd()) { - return; + return 0; } if (multifd_send_state->pages->num) { if (multifd_send_pages(f) < 0) { error_report("%s: multifd_send_pages fail", __func__); - return; + return -1; } } + + /* + * When using zero-copy, it's necessary to flush the pages before any of + * the pages can be sent again, so we'll make sure the new version of the + * pages will always arrive _later_ than the old pages. + * + * Currently we achieve this by flushing the zero-page requested writes + * per ram iteration, but in the future we could potentially optimize it + * to be less frequent, e.g. only after we finished one whole scanning of + * all the dirty bitmaps. + */ + + flush_zero_copy = migrate_use_zero_copy_send(); + for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -589,7 +604,7 @@ void multifd_send_sync_main(QEMUFile *f) if (p->quit) { error_report("%s: channel %d has already quit", __func__, i); qemu_mutex_unlock(&p->mutex); - return; + return -1; } p->packet_num = multifd_send_state->packet_num++; @@ -600,6 +615,17 @@ void multifd_send_sync_main(QEMUFile *f) ram_counters.transferred += p->packet_len; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); + + if (flush_zero_copy && p->c) { + int ret; + Error *err = NULL; + + ret = qio_channel_flush(p->c, &err); + if (ret < 0) { + error_report_err(err); + return -1; + } + } } for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -608,6 +634,8 @@ void multifd_send_sync_main(QEMUFile *f) qemu_sem_wait(&p->sem_sync); } trace_multifd_send_sync_main(multifd_send_state->packet_num); + + return 0; } static void *multifd_send_thread(void *opaque) @@ -615,6 +643,7 @@ static void *multifd_send_thread(void *opaque) MultiFDSendParams *p = opaque; Error *local_err = NULL; int ret = 0; + bool use_zero_copy_send = migrate_use_zero_copy_send(); trace_multifd_send_thread_start(p->id); rcu_register_thread(); @@ -637,9 +666,14 @@ static void *multifd_send_thread(void *opaque) if (p->pending_job) { uint64_t packet_num = p->packet_num; uint32_t flags = p->flags; - p->iovs_num = 1; p->normal_num = 0; + if (use_zero_copy_send) { + p->iovs_num = 0; + } else { + p->iovs_num = 1; + } + for (int i = 0; i < p->pages->num; i++) { p->normal[p->normal_num] = p->pages->offset[i]; p->normal_num++; @@ -663,11 +697,21 @@ static void *multifd_send_thread(void *opaque) trace_multifd_send(p->id, packet_num, p->normal_num, flags, p->next_packet_size); - p->iov[0].iov_len = p->packet_len; - p->iov[0].iov_base = p->packet; + if (use_zero_copy_send) { + /* Send header first, without zerocopy */ + ret = qio_channel_write_all(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret != 0) { + break; + } + } else { + /* Send header using the same writev call */ + p->iov[0].iov_len = p->packet_len; + p->iov[0].iov_base = p->packet; + } - ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, - &local_err); + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); if (ret != 0) { break; } @@ -782,15 +826,12 @@ static bool multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc, Error *error) { - MigrationState *s = migrate_get_current(); - trace_multifd_set_outgoing_channel( ioc, object_get_typename(OBJECT(ioc)), migrate_get_current()->hostname, error); if (!error) { - if (s->parameters.tls_creds && - *s->parameters.tls_creds && + if (migrate_use_tls() && !object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { multifd_tls_channel_connect(p, ioc, &error); @@ -898,6 +939,13 @@ int multifd_save_setup(Error **errp) /* We need one extra place for the packet header */ p->iov = g_new0(struct iovec, page_count + 1); p->normal = g_new0(ram_addr_t, page_count); + + if (migrate_use_zero_copy_send()) { + p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; + } else { + p->write_flags = 0; + } + socket_send_channel_create(multifd_new_send_channel_async, p); } diff --git a/migration/multifd.h b/migration/multifd.h index 7d0effcb03..4d8d89e5e5 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -20,7 +20,7 @@ int multifd_load_cleanup(Error **errp); bool multifd_recv_all_channels_created(void); bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); void multifd_recv_sync_main(void); -void multifd_send_sync_main(QEMUFile *f); +int multifd_send_sync_main(QEMUFile *f); int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); /* Multifd Compression flags */ @@ -92,6 +92,8 @@ typedef struct { uint32_t packet_len; /* pointer to the packet */ MultiFDPacket_t *packet; + /* multifd flags for sending ram */ + int write_flags; /* multifd flags for each packet */ uint32_t flags; /* size of the next packet that contains pages */ diff --git a/migration/ram.c b/migration/ram.c index a2489a2699..5f5e37f64d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2909,6 +2909,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) { RAMState **rsp = opaque; RAMBlock *block; + int ret; if (compress_threads_save_setup()) { return -1; @@ -2943,7 +2944,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); - multifd_send_sync_main(f); + ret = multifd_send_sync_main(f); + if (ret < 0) { + return ret; + } + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); @@ -3052,7 +3057,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { - multifd_send_sync_main(rs->f); + ret = multifd_send_sync_main(rs->f); + if (ret < 0) { + return ret; + } + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); ram_transferred_add(8); @@ -3112,13 +3121,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_FINISH); } - if (ret >= 0) { - multifd_send_sync_main(rs->f); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); + if (ret < 0) { + return ret; } - return ret; + ret = multifd_send_sync_main(rs->f); + if (ret < 0) { + return ret; + } + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + + return 0; } static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, diff --git a/migration/rdma.c b/migration/rdma.c index ef1e65ec36..672d1958a9 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, + int flags, Error **errp) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); diff --git a/migration/socket.c b/migration/socket.c index 05705a32d8..4fd5e85f50 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -74,9 +74,17 @@ static void socket_outgoing_migration(QIOTask *task, if (qio_task_propagate_error(task, &err)) { trace_migration_socket_outgoing_error(error_get_pretty(err)); - } else { - trace_migration_socket_outgoing_connected(data->hostname); + goto out; } + + trace_migration_socket_outgoing_connected(data->hostname); + + if (migrate_use_zero_copy_send() && + !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg(&err, "Zero copy send feature not detected in host kernel"); + } + +out: migration_channel_connect(data->s, sioc, data->hostname, err); object_unref(OBJECT(sioc)); } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 93061a11af..622c783c32 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_multifd_zstd_level = true; visit_type_uint8(v, param, &p->multifd_zstd_level, &err); break; +#ifdef CONFIG_LINUX + case MIGRATION_PARAMETER_ZERO_COPY_SEND: + p->has_zero_copy_send = true; + visit_type_bool(v, param, &p->zero_copy_send, &err); + break; +#endif case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: p->has_xbzrle_cache_size = true; if (!visit_type_size(v, param, &cache_size, &err)) { diff --git a/nbd/server.c b/nbd/server.c index 4cdbc062c1..213e00e761 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 Red Hat, Inc. + * Copyright (C) 2016-2022 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device Server Side @@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, int64_t size; uint64_t perm, shared_perm; bool readonly = !exp_args->writable; - bool shared = !exp_args->writable; BlockDirtyBitmapOrStrList *bitmaps; size_t i; int ret; @@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, exp->description = g_strdup(arg->description); exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); + + if (nbd_server_max_connections() != 1) { + exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; + } if (readonly) { exp->nbdflags |= NBD_FLAG_READ_ONLY; - if (shared) { - exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; - } } else { exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | NBD_FLAG_SEND_FAST_ZERO); diff --git a/net/clients.h b/net/clients.h index 92f9b59aed..c9157789f2 100644 --- a/net/clients.h +++ b/net/clients.h @@ -63,4 +63,15 @@ int net_init_vhost_user(const Netdev *netdev, const char *name, int net_init_vhost_vdpa(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); +#ifdef CONFIG_VMNET +int net_init_vmnet_host(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); + +int net_init_vmnet_shared(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); + +int net_init_vmnet_bridged(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); +#endif /* CONFIG_VMNET */ + #endif /* QEMU_NET_CLIENTS_H */ diff --git a/net/meson.build b/net/meson.build index c965e83b26..754e2d1d40 100644 --- a/net/meson.build +++ b/net/meson.build @@ -44,4 +44,11 @@ if have_vhost_net_vdpa softmmu_ss.add(files('vhost-vdpa.c')) endif +vmnet_files = files( + 'vmnet-common.m', + 'vmnet-bridged.m', + 'vmnet-host.c', + 'vmnet-shared.c' +) +softmmu_ss.add(when: vmnet, if_true: vmnet_files) subdir('can') @@ -1020,6 +1020,11 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( #ifdef CONFIG_L2TPV3 [NET_CLIENT_DRIVER_L2TPV3] = net_init_l2tpv3, #endif +#ifdef CONFIG_VMNET + [NET_CLIENT_DRIVER_VMNET_HOST] = net_init_vmnet_host, + [NET_CLIENT_DRIVER_VMNET_SHARED] = net_init_vmnet_shared, + [NET_CLIENT_DRIVER_VMNET_BRIDGED] = net_init_vmnet_bridged, +#endif /* CONFIG_VMNET */ }; @@ -1106,6 +1111,11 @@ void show_netdevs(void) #ifdef CONFIG_VHOST_VDPA "vhost-vdpa", #endif +#ifdef CONFIG_VMNET + "vmnet-host", + "vmnet-shared", + "vmnet-bridged", +#endif }; qemu_printf("Available netdev backend types:\n"); diff --git a/net/slirp.c b/net/slirp.c index bc5e9e4f77..8679be6444 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -184,23 +184,66 @@ static int64_t net_slirp_clock_get_ns(void *opaque) return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); } +typedef struct SlirpTimer SlirpTimer; +struct SlirpTimer { + QEMUTimer timer; +#if SLIRP_CHECK_VERSION(4,7,0) + Slirp *slirp; + SlirpTimerId id; + void *cb_opaque; +#endif +}; + +#if SLIRP_CHECK_VERSION(4,7,0) +static void net_slirp_init_completed(Slirp *slirp, void *opaque) +{ + SlirpState *s = opaque; + s->slirp = slirp; +} + +static void net_slirp_timer_cb(void *opaque) +{ + SlirpTimer *t = opaque; + slirp_handle_timer(t->slirp, t->id, t->cb_opaque); +} + +static void *net_slirp_timer_new_opaque(SlirpTimerId id, + void *cb_opaque, void *opaque) +{ + SlirpState *s = opaque; + SlirpTimer *t = g_new(SlirpTimer, 1); + t->slirp = s->slirp; + t->id = id; + t->cb_opaque = cb_opaque; + timer_init_full(&t->timer, NULL, QEMU_CLOCK_VIRTUAL, + SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, + net_slirp_timer_cb, t); + return t; +} +#else static void *net_slirp_timer_new(SlirpTimerCb cb, void *cb_opaque, void *opaque) { - return timer_new_full(NULL, QEMU_CLOCK_VIRTUAL, - SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, - cb, cb_opaque); + SlirpTimer *t = g_new(SlirpTimer, 1); + timer_init_full(&t->timer, NULL, QEMU_CLOCK_VIRTUAL, + SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, + cb, cb_opaque); + return t; } +#endif static void net_slirp_timer_free(void *timer, void *opaque) { - timer_free(timer); + SlirpTimer *t = timer; + timer_del(&t->timer); + g_free(t); } static void net_slirp_timer_mod(void *timer, int64_t expire_timer, void *opaque) { - timer_mod(timer, expire_timer); + SlirpTimer *t = timer; + timer_mod(&t->timer, expire_timer); } static void net_slirp_register_poll_fd(int fd, void *opaque) @@ -222,7 +265,12 @@ static const SlirpCb slirp_cb = { .send_packet = net_slirp_send_packet, .guest_error = net_slirp_guest_error, .clock_get_ns = net_slirp_clock_get_ns, +#if SLIRP_CHECK_VERSION(4,7,0) + .init_completed = net_slirp_init_completed, + .timer_new_opaque = net_slirp_timer_new_opaque, +#else .timer_new = net_slirp_timer_new, +#endif .timer_free = net_slirp_timer_free, .timer_mod = net_slirp_timer_mod, .register_poll_fd = net_slirp_register_poll_fd, @@ -380,6 +428,7 @@ static int net_slirp_init(NetClientState *peer, const char *model, #if defined(CONFIG_SMBD_COMMAND) struct in_addr smbsrv = { .s_addr = 0 }; #endif + SlirpConfig cfg = { 0 }; NetClientState *nc; SlirpState *s; char buf[20]; @@ -568,12 +617,26 @@ static int net_slirp_init(NetClientState *peer, const char *model, s = DO_UPCAST(SlirpState, nc, nc); - s->slirp = slirp_init(restricted, ipv4, net, mask, host, - ipv6, ip6_prefix, vprefix6_len, ip6_host, - vhostname, tftp_server_name, - tftp_export, bootfile, dhcp, - dns, ip6_dns, dnssearch, vdomainname, - &slirp_cb, s); + cfg.version = SLIRP_CHECK_VERSION(4,7,0) ? 4 : 1; + cfg.restricted = restricted; + cfg.in_enabled = ipv4; + cfg.vnetwork = net; + cfg.vnetmask = mask; + cfg.vhost = host; + cfg.in6_enabled = ipv6; + cfg.vprefix_addr6 = ip6_prefix; + cfg.vprefix_len = vprefix6_len; + cfg.vhost6 = ip6_host; + cfg.vhostname = vhostname; + cfg.tftp_server_name = tftp_server_name; + cfg.tftp_path = tftp_export; + cfg.bootfile = bootfile; + cfg.vdhcp_start = dhcp; + cfg.vnameserver = dns; + cfg.vnameserver6 = ip6_dns; + cfg.vdnssearch = dnssearch; + cfg.vdomainname = vdomainname; + s->slirp = slirp_new(&cfg, &slirp_cb, s); QTAILQ_INSERT_TAIL(&slirp_stacks, s, entry); /* diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 1e9fe47c03..df1e69ee72 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, err: if (i) { - qemu_del_net_client(ncs[0]); + for (i--; i >= 0; i--) { + qemu_del_net_client(ncs[i]); + } } qemu_close(vdpa_device_fd); diff --git a/net/vmnet-bridged.m b/net/vmnet-bridged.m new file mode 100644 index 0000000000..46d2282863 --- /dev/null +++ b/net/vmnet-bridged.m @@ -0,0 +1,152 @@ +/* + * vmnet-bridged.m + * + * Copyright(c) 2022 Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/qapi-types-net.h" +#include "qapi/error.h" +#include "clients.h" +#include "vmnet_int.h" + +#include <vmnet/vmnet.h> + + +static bool validate_ifname(const char *ifname) +{ + xpc_object_t shared_if_list = vmnet_copy_shared_interface_list(); + bool match = false; + if (!xpc_array_get_count(shared_if_list)) { + goto done; + } + + match = !xpc_array_apply( + shared_if_list, + ^bool(size_t index, xpc_object_t value) { + return strcmp(xpc_string_get_string_ptr(value), ifname) != 0; + }); + +done: + xpc_release(shared_if_list); + return match; +} + + +static char* get_valid_ifnames() +{ + xpc_object_t shared_if_list = vmnet_copy_shared_interface_list(); + __block char *if_list = NULL; + __block char *if_list_prev = NULL; + + if (!xpc_array_get_count(shared_if_list)) { + goto done; + } + + xpc_array_apply( + shared_if_list, + ^bool(size_t index, xpc_object_t value) { + /* build list of strings like "en0 en1 en2 " */ + if_list = g_strconcat(xpc_string_get_string_ptr(value), + " ", + if_list_prev, + NULL); + g_free(if_list_prev); + if_list_prev = if_list; + return true; + }); + +done: + xpc_release(shared_if_list); + return if_list; +} + + +static bool validate_options(const Netdev *netdev, Error **errp) +{ + const NetdevVmnetBridgedOptions *options = &(netdev->u.vmnet_bridged); + char* if_list; + + if (!validate_ifname(options->ifname)) { + if_list = get_valid_ifnames(); + if (if_list) { + error_setg(errp, + "unsupported ifname '%s', expected one of [ %s]", + options->ifname, + if_list); + g_free(if_list); + } else { + error_setg(errp, + "unsupported ifname '%s', no supported " + "interfaces available", + options->ifname); + } + return false; + } + +#if !defined(MAC_OS_VERSION_11_0) || \ + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 + if (options->has_isolated) { + error_setg(errp, + "vmnet-bridged.isolated feature is " + "unavailable: outdated vmnet.framework API"); + return false; + } +#endif + return true; +} + + +static xpc_object_t build_if_desc(const Netdev *netdev) +{ + const NetdevVmnetBridgedOptions *options = &(netdev->u.vmnet_bridged); + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); + + xpc_dictionary_set_uint64(if_desc, + vmnet_operation_mode_key, + VMNET_BRIDGED_MODE + ); + + xpc_dictionary_set_string(if_desc, + vmnet_shared_interface_name_key, + options->ifname); + +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + xpc_dictionary_set_bool(if_desc, + vmnet_enable_isolation_key, + options->isolated); +#endif + return if_desc; +} + + +static NetClientInfo net_vmnet_bridged_info = { + .type = NET_CLIENT_DRIVER_VMNET_BRIDGED, + .size = sizeof(VmnetState), + .receive = vmnet_receive_common, + .cleanup = vmnet_cleanup_common, +}; + + +int net_init_vmnet_bridged(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + NetClientState *nc = qemu_new_net_client(&net_vmnet_bridged_info, + peer, "vmnet-bridged", name); + xpc_object_t if_desc; + int result = -1; + + if (!validate_options(netdev, errp)) { + return result; + } + + if_desc = build_if_desc(netdev); + result = vmnet_if_create(nc, if_desc, errp); + xpc_release(if_desc); + return result; +} diff --git a/net/vmnet-common.m b/net/vmnet-common.m new file mode 100644 index 0000000000..2cb60b9ddd --- /dev/null +++ b/net/vmnet-common.m @@ -0,0 +1,378 @@ +/* + * vmnet-common.m - network client wrapper for Apple vmnet.framework + * + * Copyright(c) 2022 Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> + * Copyright(c) 2021 Phillip Tennen <phillip@axleos.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/log.h" +#include "qapi/qapi-types-net.h" +#include "vmnet_int.h" +#include "clients.h" +#include "qemu/error-report.h" +#include "qapi/error.h" + +#include <vmnet/vmnet.h> +#include <dispatch/dispatch.h> + + +static void vmnet_send_completed(NetClientState *nc, ssize_t len); + + +const char *vmnet_status_map_str(vmnet_return_t status) +{ + switch (status) { + case VMNET_SUCCESS: + return "success"; + case VMNET_FAILURE: + return "general failure (possibly not enough privileges)"; + case VMNET_MEM_FAILURE: + return "memory allocation failure"; + case VMNET_INVALID_ARGUMENT: + return "invalid argument specified"; + case VMNET_SETUP_INCOMPLETE: + return "interface setup is not complete"; + case VMNET_INVALID_ACCESS: + return "invalid access, permission denied"; + case VMNET_PACKET_TOO_BIG: + return "packet size is larger than MTU"; + case VMNET_BUFFER_EXHAUSTED: + return "buffers exhausted in kernel"; + case VMNET_TOO_MANY_PACKETS: + return "packet count exceeds limit"; +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + case VMNET_SHARING_SERVICE_BUSY: + return "conflict, sharing service is in use"; +#endif + default: + return "unknown vmnet error"; + } +} + + +/** + * Write packets from QEMU to vmnet interface. + * + * vmnet.framework supports iov, but writing more than + * one iov into vmnet interface fails with + * 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into + * one and passing it to vmnet works fine. That's the + * reason why receive_iov() left unimplemented. But it still + * works with good performance having .receive() only. + */ +ssize_t vmnet_receive_common(NetClientState *nc, + const uint8_t *buf, + size_t size) +{ + VmnetState *s = DO_UPCAST(VmnetState, nc, nc); + struct vmpktdesc packet; + struct iovec iov; + int pkt_cnt; + vmnet_return_t if_status; + + if (size > s->max_packet_size) { + warn_report("vmnet: packet is too big, %zu > %" PRIu64, + packet.vm_pkt_size, + s->max_packet_size); + return -1; + } + + iov.iov_base = (char *) buf; + iov.iov_len = size; + + packet.vm_pkt_iovcnt = 1; + packet.vm_flags = 0; + packet.vm_pkt_size = size; + packet.vm_pkt_iov = &iov; + pkt_cnt = 1; + + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); + if (if_status != VMNET_SUCCESS) { + error_report("vmnet: write error: %s\n", + vmnet_status_map_str(if_status)); + return -1; + } + + if (pkt_cnt) { + return size; + } + return 0; +} + + +/** + * Read packets from vmnet interface and write them + * to temporary buffers in VmnetState. + * + * Returns read packets number (may be 0) on success, + * -1 on error + */ +static int vmnet_read_packets(VmnetState *s) +{ + assert(s->packets_send_current_pos == s->packets_send_end_pos); + + struct vmpktdesc *packets = s->packets_buf; + vmnet_return_t status; + int i; + + /* Read as many packets as present */ + s->packets_send_current_pos = 0; + s->packets_send_end_pos = VMNET_PACKETS_LIMIT; + for (i = 0; i < s->packets_send_end_pos; ++i) { + packets[i].vm_pkt_size = s->max_packet_size; + packets[i].vm_pkt_iovcnt = 1; + packets[i].vm_flags = 0; + } + + status = vmnet_read(s->vmnet_if, packets, &s->packets_send_end_pos); + if (status != VMNET_SUCCESS) { + error_printf("vmnet: read failed: %s\n", + vmnet_status_map_str(status)); + s->packets_send_current_pos = 0; + s->packets_send_end_pos = 0; + return -1; + } + return s->packets_send_end_pos; +} + + +/** + * Write packets from temporary buffers in VmnetState + * to QEMU. + */ +static void vmnet_write_packets_to_qemu(VmnetState *s) +{ + while (s->packets_send_current_pos < s->packets_send_end_pos) { + ssize_t size = qemu_send_packet_async(&s->nc, + s->iov_buf[s->packets_send_current_pos].iov_base, + s->packets_buf[s->packets_send_current_pos].vm_pkt_size, + vmnet_send_completed); + + if (size == 0) { + /* QEMU is not ready to consume more packets - + * stop and wait for completion callback call */ + return; + } + ++s->packets_send_current_pos; + } +} + + +/** + * Bottom half callback that transfers packets from vmnet interface + * to QEMU. + * + * The process of transferring packets is three-staged: + * 1. Handle vmnet event; + * 2. Read packets from vmnet interface into temporary buffer; + * 3. Write packets from temporary buffer to QEMU. + * + * QEMU may suspend this process on the last stage, returning 0 from + * qemu_send_packet_async function. If this happens, we should + * respectfully wait until it is ready to consume more packets, + * write left ones in temporary buffer and only after this + * continue reading more packets from vmnet interface. + * + * Packets to be transferred are stored into packets_buf, + * in the window [packets_send_current_pos..packets_send_end_pos) + * including current_pos, excluding end_pos. + * + * Thus, if QEMU is not ready, buffer is not read and + * packets_send_current_pos < packets_send_end_pos. + */ +static void vmnet_send_bh(void *opaque) +{ + NetClientState *nc = (NetClientState *) opaque; + VmnetState *s = DO_UPCAST(VmnetState, nc, nc); + + /* + * Do nothing if QEMU is not ready - wait + * for completion callback invocation + */ + if (s->packets_send_current_pos < s->packets_send_end_pos) { + return; + } + + /* Read packets from vmnet interface */ + if (vmnet_read_packets(s) > 0) { + /* Send them to QEMU */ + vmnet_write_packets_to_qemu(s); + } +} + + +/** + * Completion callback to be invoked by QEMU when it becomes + * ready to consume more packets. + */ +static void vmnet_send_completed(NetClientState *nc, ssize_t len) +{ + VmnetState *s = DO_UPCAST(VmnetState, nc, nc); + + /* Callback is invoked eq queued packet is sent */ + ++s->packets_send_current_pos; + + /* Complete sending packets left in VmnetState buffers */ + vmnet_write_packets_to_qemu(s); + + /* And read new ones from vmnet if VmnetState buffer is ready */ + if (s->packets_send_current_pos < s->packets_send_end_pos) { + qemu_bh_schedule(s->send_bh); + } +} + + +static void vmnet_bufs_init(VmnetState *s) +{ + struct vmpktdesc *packets = s->packets_buf; + struct iovec *iov = s->iov_buf; + int i; + + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { + iov[i].iov_len = s->max_packet_size; + iov[i].iov_base = g_malloc0(iov[i].iov_len); + packets[i].vm_pkt_iov = iov + i; + } +} + + +int vmnet_if_create(NetClientState *nc, + xpc_object_t if_desc, + Error **errp) +{ + VmnetState *s = DO_UPCAST(VmnetState, nc, nc); + dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0); + __block vmnet_return_t if_status; + + s->if_queue = dispatch_queue_create( + "org.qemu.vmnet.if_queue", + DISPATCH_QUEUE_SERIAL + ); + + xpc_dictionary_set_bool( + if_desc, + vmnet_allocate_mac_address_key, + false + ); + +#ifdef DEBUG + qemu_log("vmnet.start.interface_desc:\n"); + xpc_dictionary_apply(if_desc, + ^bool(const char *k, xpc_object_t v) { + char *desc = xpc_copy_description(v); + qemu_log(" %s=%s\n", k, desc); + free(desc); + return true; + }); +#endif /* DEBUG */ + + s->vmnet_if = vmnet_start_interface( + if_desc, + s->if_queue, + ^(vmnet_return_t status, xpc_object_t interface_param) { + if_status = status; + if (status != VMNET_SUCCESS || !interface_param) { + dispatch_semaphore_signal(if_created_sem); + return; + } + +#ifdef DEBUG + qemu_log("vmnet.start.interface_param:\n"); + xpc_dictionary_apply(interface_param, + ^bool(const char *k, xpc_object_t v) { + char *desc = xpc_copy_description(v); + qemu_log(" %s=%s\n", k, desc); + free(desc); + return true; + }); +#endif /* DEBUG */ + + s->mtu = xpc_dictionary_get_uint64( + interface_param, + vmnet_mtu_key); + s->max_packet_size = xpc_dictionary_get_uint64( + interface_param, + vmnet_max_packet_size_key); + + dispatch_semaphore_signal(if_created_sem); + }); + + if (s->vmnet_if == NULL) { + dispatch_release(s->if_queue); + dispatch_release(if_created_sem); + error_setg(errp, + "unable to create interface with requested params"); + return -1; + } + + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); + dispatch_release(if_created_sem); + + if (if_status != VMNET_SUCCESS) { + dispatch_release(s->if_queue); + error_setg(errp, + "cannot create vmnet interface: %s", + vmnet_status_map_str(if_status)); + return -1; + } + + s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc); + vmnet_bufs_init(s); + + s->packets_send_current_pos = 0; + s->packets_send_end_pos = 0; + + vmnet_interface_set_event_callback( + s->vmnet_if, + VMNET_INTERFACE_PACKETS_AVAILABLE, + s->if_queue, + ^(interface_event_t event_id, xpc_object_t event) { + assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE); + /* + * This function is being called from a non qemu thread, so + * we only schedule a BH, and do the rest of the io completion + * handling from vmnet_send_bh() which runs in a qemu context. + */ + qemu_bh_schedule(s->send_bh); + }); + + return 0; +} + + +void vmnet_cleanup_common(NetClientState *nc) +{ + VmnetState *s = DO_UPCAST(VmnetState, nc, nc); + dispatch_semaphore_t if_stopped_sem; + + if (s->vmnet_if == NULL) { + return; + } + + if_stopped_sem = dispatch_semaphore_create(0); + vmnet_stop_interface( + s->vmnet_if, + s->if_queue, + ^(vmnet_return_t status) { + assert(status == VMNET_SUCCESS); + dispatch_semaphore_signal(if_stopped_sem); + }); + dispatch_semaphore_wait(if_stopped_sem, DISPATCH_TIME_FOREVER); + + qemu_purge_queued_packets(nc); + + qemu_bh_delete(s->send_bh); + dispatch_release(if_stopped_sem); + dispatch_release(s->if_queue); + + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { + g_free(s->iov_buf[i].iov_base); + } +} diff --git a/net/vmnet-host.c b/net/vmnet-host.c new file mode 100644 index 0000000000..05f8d78864 --- /dev/null +++ b/net/vmnet-host.c @@ -0,0 +1,128 @@ +/* + * vmnet-host.c + * + * Copyright(c) 2022 Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/uuid.h" +#include "qapi/qapi-types-net.h" +#include "qapi/error.h" +#include "clients.h" +#include "vmnet_int.h" + +#include <vmnet/vmnet.h> + + +static bool validate_options(const Netdev *netdev, Error **errp) +{ + const NetdevVmnetHostOptions *options = &(netdev->u.vmnet_host); + +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + + QemuUUID net_uuid; + if (options->has_net_uuid && + qemu_uuid_parse(options->net_uuid, &net_uuid) < 0) { + error_setg(errp, "Invalid UUID provided in 'net-uuid'"); + return false; + } +#else + if (options->has_isolated) { + error_setg(errp, + "vmnet-host.isolated feature is " + "unavailable: outdated vmnet.framework API"); + return false; + } + + if (options->has_net_uuid) { + error_setg(errp, + "vmnet-host.net-uuid feature is " + "unavailable: outdated vmnet.framework API"); + return false; + } +#endif + + if ((options->has_start_address || + options->has_end_address || + options->has_subnet_mask) && + !(options->has_start_address && + options->has_end_address && + options->has_subnet_mask)) { + error_setg(errp, + "'start-address', 'end-address', 'subnet-mask' " + "should be provided together"); + return false; + } + + return true; +} + +static xpc_object_t build_if_desc(const Netdev *netdev) +{ + const NetdevVmnetHostOptions *options = &(netdev->u.vmnet_host); + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); + + xpc_dictionary_set_uint64(if_desc, + vmnet_operation_mode_key, + VMNET_HOST_MODE); + +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + + xpc_dictionary_set_bool(if_desc, + vmnet_enable_isolation_key, + options->isolated); + + QemuUUID net_uuid; + if (options->has_net_uuid) { + qemu_uuid_parse(options->net_uuid, &net_uuid); + xpc_dictionary_set_uuid(if_desc, + vmnet_network_identifier_key, + net_uuid.data); + } +#endif + + if (options->has_start_address) { + xpc_dictionary_set_string(if_desc, + vmnet_start_address_key, + options->start_address); + xpc_dictionary_set_string(if_desc, + vmnet_end_address_key, + options->end_address); + xpc_dictionary_set_string(if_desc, + vmnet_subnet_mask_key, + options->subnet_mask); + } + + return if_desc; +} + +static NetClientInfo net_vmnet_host_info = { + .type = NET_CLIENT_DRIVER_VMNET_HOST, + .size = sizeof(VmnetState), + .receive = vmnet_receive_common, + .cleanup = vmnet_cleanup_common, +}; + +int net_init_vmnet_host(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + NetClientState *nc = qemu_new_net_client(&net_vmnet_host_info, + peer, "vmnet-host", name); + xpc_object_t if_desc; + int result = -1; + + if (!validate_options(netdev, errp)) { + return result; + } + + if_desc = build_if_desc(netdev); + result = vmnet_if_create(nc, if_desc, errp); + xpc_release(if_desc); + return result; +} diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c new file mode 100644 index 0000000000..18cadc72bd --- /dev/null +++ b/net/vmnet-shared.c @@ -0,0 +1,114 @@ +/* + * vmnet-shared.c + * + * Copyright(c) 2022 Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/qapi-types-net.h" +#include "qapi/error.h" +#include "vmnet_int.h" +#include "clients.h" + +#include <vmnet/vmnet.h> + + +static bool validate_options(const Netdev *netdev, Error **errp) +{ + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); + +#if !defined(MAC_OS_VERSION_11_0) || \ + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 + if (options->has_isolated) { + error_setg(errp, + "vmnet-shared.isolated feature is " + "unavailable: outdated vmnet.framework API"); + return false; + } +#endif + + if ((options->has_start_address || + options->has_end_address || + options->has_subnet_mask) && + !(options->has_start_address && + options->has_end_address && + options->has_subnet_mask)) { + error_setg(errp, + "'start-address', 'end-address', 'subnet-mask' " + "should be provided together" + ); + return false; + } + + return true; +} + +static xpc_object_t build_if_desc(const Netdev *netdev) +{ + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); + + xpc_dictionary_set_uint64( + if_desc, + vmnet_operation_mode_key, + VMNET_SHARED_MODE + ); + + if (options->has_nat66_prefix) { + xpc_dictionary_set_string(if_desc, + vmnet_nat66_prefix_key, + options->nat66_prefix); + } + + if (options->has_start_address) { + xpc_dictionary_set_string(if_desc, + vmnet_start_address_key, + options->start_address); + xpc_dictionary_set_string(if_desc, + vmnet_end_address_key, + options->end_address); + xpc_dictionary_set_string(if_desc, + vmnet_subnet_mask_key, + options->subnet_mask); + } + +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + xpc_dictionary_set_bool( + if_desc, + vmnet_enable_isolation_key, + options->isolated + ); +#endif + + return if_desc; +} + +static NetClientInfo net_vmnet_shared_info = { + .type = NET_CLIENT_DRIVER_VMNET_SHARED, + .size = sizeof(VmnetState), + .receive = vmnet_receive_common, + .cleanup = vmnet_cleanup_common, +}; + +int net_init_vmnet_shared(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info, + peer, "vmnet-shared", name); + xpc_object_t if_desc; + int result = -1; + + if (!validate_options(netdev, errp)) { + return result; + } + + if_desc = build_if_desc(netdev); + result = vmnet_if_create(nc, if_desc, errp); + xpc_release(if_desc); + return result; +} diff --git a/net/vmnet_int.h b/net/vmnet_int.h new file mode 100644 index 0000000000..adf6e8c20d --- /dev/null +++ b/net/vmnet_int.h @@ -0,0 +1,63 @@ +/* + * vmnet_int.h + * + * Copyright(c) 2022 Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef VMNET_INT_H +#define VMNET_INT_H + +#include "qemu/osdep.h" +#include "vmnet_int.h" +#include "clients.h" + +#include <vmnet/vmnet.h> +#include <dispatch/dispatch.h> + +/** + * From vmnet.framework documentation + * + * Each read/write call allows up to 200 packets to be + * read or written for a maximum of 256KB. + * + * Each packet written should be a complete + * ethernet frame. + * + * https://developer.apple.com/documentation/vmnet + */ +#define VMNET_PACKETS_LIMIT 200 + +typedef struct VmnetState { + NetClientState nc; + interface_ref vmnet_if; + + uint64_t mtu; + uint64_t max_packet_size; + + dispatch_queue_t if_queue; + + QEMUBH *send_bh; + + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; + int packets_send_current_pos; + int packets_send_end_pos; + + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; +} VmnetState; + +const char *vmnet_status_map_str(vmnet_return_t status); + +int vmnet_if_create(NetClientState *nc, + xpc_object_t if_desc, + Error **errp); + +ssize_t vmnet_receive_common(NetClientState *nc, + const uint8_t *buf, + size_t size); + +void vmnet_cleanup_common(NetClientState *nc); + +#endif /* VMNET_INT_H */ diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile index f1ef898073..2494ad9c25 100644 --- a/pc-bios/optionrom/Makefile +++ b/pc-bios/optionrom/Makefile @@ -22,9 +22,11 @@ override CFLAGS += $(call cc-option, -fcf-protection=none) override CPPFLAGS += -MMD -MP -MT $@ -MF $(@D)/$(*F).d override CFLAGS += $(filter -W%, $(QEMU_CFLAGS)) -override CFLAGS += $(CFLAGS_NOPIE) -ffreestanding -I$(TOPSRC_DIR)/include +override CFLAGS += $(call cc-option, -fno-pie) +override CFLAGS += -ffreestanding -I$(TOPSRC_DIR)/include override CFLAGS += $(call cc-option, -fno-stack-protector) override CFLAGS += $(call cc-option, -m16) +override CFLAGS += $(call cc-option, -Wno-array-bounds) ifeq ($(filter -m16, $(CFLAGS)),) # Attempt to work around compilers that lack -m16 (GCC <= 4.8, clang <= ??) diff --git a/plugins/plugin.h b/plugins/plugin.h index b13677d0dc..5eb2fdbc85 100644 --- a/plugins/plugin.h +++ b/plugins/plugin.h @@ -9,8 +9,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef PLUGIN_INTERNAL_H -#define PLUGIN_INTERNAL_H +#ifndef PLUGIN_H +#define PLUGIN_H #include <gmodule.h> #include "qemu/qht.h" @@ -97,4 +97,4 @@ void plugin_register_vcpu_mem_cb(GArray **arr, void exec_inline_op(struct qemu_plugin_dyn_cb *cb); -#endif /* _PLUGIN_INTERNAL_H_ */ +#endif /* PLUGIN_H */ diff --git a/qapi/audio.json b/qapi/audio.json index 0785e70a50..8099e3d7f1 100644 --- a/qapi/audio.json +++ b/qapi/audio.json @@ -352,7 +352,6 @@ '*out': 'AudiodevPerDirectionOptions', '*path': 'str' } } - ## # @AudioFormat: # diff --git a/qapi/block-core.json b/qapi/block-core.json index b66494e8c5..f0383c7925 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -237,7 +237,6 @@ # information (since 1.7) # # Since: 1.3 -# ## { 'struct': 'ImageInfo', 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', @@ -288,7 +287,6 @@ # supports it # # Since: 1.4 -# ## { 'struct': 'ImageCheck', 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', @@ -328,7 +326,6 @@ # @filename: filename that is referred to by @offset # # Since: 2.6 -# ## { 'struct': 'MapEntry', 'data': {'start': 'int', 'length': 'int', 'data': 'bool', @@ -340,9 +337,9 @@ # # Cache mode information for a block device # -# @writeback: true if writeback mode is enabled -# @direct: true if the host page cache is bypassed (O_DIRECT) -# @no-flush: true if flush requests are ignored for the device +# @writeback: true if writeback mode is enabled +# @direct: true if the host page cache is bypassed (O_DIRECT) +# @no-flush: true if flush requests are ignored for the device # # Since: 2.3 ## @@ -445,7 +442,6 @@ # has one or more dirty bitmaps) (Since 4.2) # # Since: 0.14 -# ## { 'struct': 'BlockDeviceInfo', 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', @@ -608,7 +604,7 @@ # @inserted: @BlockDeviceInfo describing the device if media is # present # -# Since: 0.14 +# Since: 0.14 ## { 'struct': 'BlockInfo', 'data': {'device': 'str', '*qdev': 'str', 'type': 'str', 'removable': 'bool', @@ -743,7 +739,6 @@ ## { 'command': 'query-block', 'returns': ['BlockInfo'] } - ## # @BlockDeviceTimedStats: # @@ -800,9 +795,9 @@ # # Statistics of a virtual block device or a block backing device. # -# @rd_bytes: The number of bytes read by the device. +# @rd_bytes: The number of bytes read by the device. # -# @wr_bytes: The number of bytes written by the device. +# @wr_bytes: The number of bytes written by the device. # # @unmap_bytes: The number of bytes unmapped by the device (Since 4.2) # @@ -975,7 +970,7 @@ # @qdev: The qdev ID, or if no ID is assigned, the QOM path of the block # device. (since 3.0) # -# @stats: A @BlockDeviceStats for the device. +# @stats: A @BlockDeviceStats for the device. # # @driver-specific: Optional driver-specific stats. (Since 4.2) # @@ -1280,7 +1275,7 @@ # # @node-name: graph node name to get the image resized (Since 2.0) # -# @size: new image size in bytes +# @size: new image size in bytes # # Returns: - nothing on success # - If @device is not a valid block device, DeviceNotFound @@ -1516,7 +1511,6 @@ { 'command': 'blockdev-snapshot-sync', 'data': 'BlockdevSnapshotSync' } - ## # @blockdev-snapshot: # @@ -1744,6 +1738,7 @@ # Since: 2.3 # # Example: +# # -> { "execute": "blockdev-backup", # "arguments": { "device": "src-id", # "sync": "full", @@ -1754,7 +1749,6 @@ { 'command': 'blockdev-backup', 'boxed': true, 'data': 'BlockdevBackup' } - ## # @query-named-block-nodes: # @@ -1966,8 +1960,8 @@ # @job-id: identifier for the newly-created block job. If # omitted, the device name will be used. (Since 2.7) # -# @device: the device name or node-name of a root node whose writes should be -# mirrored. +# @device: the device name or node-name of a root node whose writes should be +# mirrored. # # @target: the target of the new image. If the file exists, or if it # is a device, the existing file/device will be used as the new @@ -1987,7 +1981,7 @@ # @mode: whether and how QEMU should create a new image, default is # 'absolute-paths'. # -# @speed: the maximum speed, in bytes per second +# @speed: the maximum speed, in bytes per second # # @sync: what parts of the disk image should be copied to the destination # (all the disk, only the sectors allocated in the topmost image, or @@ -2008,6 +2002,7 @@ # @on-target-error: the action to take on an error on the target, # default 'report' (no limitations, since this applies to # a different block device than @device). +# # @unmap: Whether to try to unmap target sectors where source has # only zero. If true, and target unallocated sectors will read as zero, # target image sectors will be unmapped; otherwise, zeroes will be @@ -2029,6 +2024,7 @@ # When true, this job will automatically disappear from the query # list without user intervention. # Defaults to true. (Since 3.1) +# # Since: 1.3 ## { 'struct': 'DriveMirror', @@ -2300,7 +2296,7 @@ # broken Quorum files. By default, @device is replaced, although # implicitly created filters on it are kept. # -# @speed: the maximum speed, in bytes per second +# @speed: the maximum speed, in bytes per second # # @sync: what parts of the disk image should be copied to the destination # (all the disk, only the sectors allocated in the topmost image, or @@ -2342,6 +2338,7 @@ # When true, this job will automatically disappear from the query # list without user intervention. # Defaults to true. (Since 3.1) +# # Returns: nothing on success. # # Since: 2.6 @@ -3067,7 +3064,6 @@ 'base': 'BlockdevOptionsGenericFormat', 'data': { '*key-secret': 'str' } } - ## # @BlockdevOptionsGenericCOWFormat: # @@ -3182,8 +3178,6 @@ 'base': 'BlockdevOptionsGenericCOWFormat', 'data': { '*encrypt': 'BlockdevQcowEncryption' } } - - ## # @BlockdevQcow2EncryptionFormat: # @@ -3339,15 +3333,14 @@ ## # @BlockdevOptionsSsh: # -# @server: host address +# @server: host address # -# @path: path to the image on the host +# @path: path to the image on the host # -# @user: user as which to connect, defaults to current -# local user name +# @user: user as which to connect, defaults to current local user name # -# @host-key-check: Defines how and what to check the host key against -# (default: known_hosts) +# @host-key-check: Defines how and what to check the host key against +# (default: known_hosts) # # Since: 2.9 ## @@ -3357,7 +3350,6 @@ '*user': 'str', '*host-key-check': 'SshHostKeyCheck' } } - ## # @BlkdebugEvent: # @@ -3721,7 +3713,6 @@ '*header-digest': 'IscsiHeaderDigest', '*timeout': 'int' } } - ## # @RbdAuthMode: # @@ -4139,6 +4130,7 @@ # @throttle-group: the name of the throttle-group object to use. It # must already exist. # @file: reference to or definition of the data source block device +# # Since: 2.11 ## { 'struct': 'BlockdevOptionsThrottle', @@ -4555,15 +4547,14 @@ ## # @BlockdevQcow2Version: # -# @v2: The original QCOW2 format as introduced in qemu 0.10 (version 2) -# @v3: The extended QCOW2 format as introduced in qemu 1.1 (version 3) +# @v2: The original QCOW2 format as introduced in qemu 0.10 (version 2) +# @v3: The extended QCOW2 format as introduced in qemu 1.1 (version 3) # # Since: 2.12 ## { 'enum': 'BlockdevQcow2Version', 'data': [ 'v2', 'v3' ] } - ## # @Qcow2CompressionType: # @@ -4670,18 +4661,18 @@ # # Subformat options for VMDK images # -# @monolithicSparse: Single file image with sparse cluster allocation +# @monolithicSparse: Single file image with sparse cluster allocation # -# @monolithicFlat: Single flat data image and a descriptor file +# @monolithicFlat: Single flat data image and a descriptor file # # @twoGbMaxExtentSparse: Data is split into 2GB (per virtual LBA) sparse extent # files, in addition to a descriptor file # -# @twoGbMaxExtentFlat: Data is split into 2GB (per virtual LBA) flat extent -# files, in addition to a descriptor file +# @twoGbMaxExtentFlat: Data is split into 2GB (per virtual LBA) flat extent +# files, in addition to a descriptor file # -# @streamOptimized: Single file image sparse cluster allocation, optimized -# for streaming over network. +# @streamOptimized: Single file image sparse cluster allocation, optimized +# for streaming over network. # # Since: 4.0 ## @@ -4737,7 +4728,6 @@ '*toolsversion': 'str', '*zeroed-grain': 'bool' } } - ## # @BlockdevCreateOptionsSsh: # @@ -4773,7 +4763,7 @@ # @BlockdevVhdxSubformat: # # @dynamic: Growing image file -# @fixed: Preallocated fixed-size image file +# @fixed: Preallocated fixed-size image file # # Since: 2.12 ## @@ -4811,7 +4801,7 @@ # @BlockdevVpcSubformat: # # @dynamic: Growing image file -# @fixed: Preallocated fixed-size image file +# @fixed: Preallocated fixed-size image file # # Since: 2.12 ## @@ -4874,9 +4864,9 @@ # Starts a job to create an image format on a given node. The job is # automatically finalized, but a manual job-dismiss is required. # -# @job-id: Identifier for the newly created job. +# @job-id: Identifier for the newly created job. # -# @options: Options for the image creation. +# @options: Options for the image creation. # # Since: 3.0 ## @@ -4914,7 +4904,7 @@ # # Options for amending an image format # -# @driver: Block driver of the node to amend. +# @driver: Block driver of the node to amend. # # Since: 5.1 ## @@ -4932,17 +4922,17 @@ # Starts a job to amend format specific options of an existing open block device # The job is automatically finalized, but a manual job-dismiss is required. # -# @job-id: Identifier for the newly created job. +# @job-id: Identifier for the newly created job. # -# @node-name: Name of the block node to work on +# @node-name: Name of the block node to work on # -# @options: Options (driver specific) +# @options: Options (driver specific) # -# @force: Allow unsafe operations, format specific -# For luks that allows erase of the last active keyslot -# (permanent loss of data), -# and replacement of an active keyslot -# (possible loss of data if IO error happens) +# @force: Allow unsafe operations, format specific +# For luks that allows erase of the last active keyslot +# (permanent loss of data), +# and replacement of an active keyslot +# (possible loss of data if IO error happens) # # Features: # @unstable: This command is experimental. @@ -4972,7 +4962,6 @@ { 'enum': 'BlockErrorAction', 'data': [ 'ignore', 'report', 'stop' ] } - ## # @BLOCK_IMAGE_CORRUPTED: # diff --git a/qapi/block-export.json b/qapi/block-export.json index 1de16d2589..0685cb8b9a 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -22,7 +22,9 @@ # recreated on the fly while the NBD server is active. # If missing, it will default to denying access (since 4.0). # @max-connections: The maximum number of connections to allow at the same -# time, 0 for unlimited. (since 5.2; default: 0) +# time, 0 for unlimited. Setting this to 1 also stops +# the server from advertising multiple client support +# (since 5.2; default: 0) # # Since: 4.2 ## @@ -51,7 +53,9 @@ # recreated on the fly while the NBD server is active. # If missing, it will default to denying access (since 4.0). # @max-connections: The maximum number of connections to allow at the same -# time, 0 for unlimited. (since 5.2; default: 0) +# time, 0 for unlimited. Setting this to 1 also stops +# the server from advertising multiple client support +# (since 5.2; default: 0). # # Returns: error if the server is already running. # @@ -387,7 +391,7 @@ # block-export-del command, but before the shutdown has # completed) # -# Since: 5.2 +# Since: 5.2 ## { 'struct': 'BlockExportInfo', 'data': { 'id': 'str', diff --git a/qapi/block.json b/qapi/block.json index 3f100d4887..19326641ac 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -50,9 +50,9 @@ # # Type of Floppy drive to be emulated by the Floppy Disk Controller. # -# @144: 1.44MB 3.5" drive -# @288: 2.88MB 3.5" drive -# @120: 1.2MB 5.25" drive +# @144: 1.44MB 3.5" drive +# @288: 2.88MB 3.5" drive +# @120: 1.2MB 5.25" drive # @none: No drive connected # @auto: Automatically determined by inserted media at boot # @@ -105,7 +105,8 @@ # # Returns: - Nothing on success # - If @device is not a valid block device, DeviceNotFound -# Notes: Ejecting a device with no media results in success +# +# Notes: Ejecting a device with no media results in success # # Since: 0.14 # @@ -285,7 +286,6 @@ 'data': { 'id': 'str', 'node-name': 'str'} } - ## # @BlockdevChangeReadOnlyMode: # @@ -299,12 +299,10 @@ # @read-write: Makes the device writable # # Since: 2.3 -# ## { 'enum': 'BlockdevChangeReadOnlyMode', 'data': ['retain', 'read-only', 'read-write'] } - ## # @blockdev-change-medium: # @@ -375,7 +373,6 @@ '*force': 'bool', '*read-only-mode': 'BlockdevChangeReadOnlyMode' } } - ## # @DEVICE_TRAY_MOVED: # diff --git a/qapi/char.json b/qapi/char.json index 7b42151575..923dc5056d 100644 --- a/qapi/char.json +++ b/qapi/char.json @@ -216,7 +216,7 @@ # # Configuration info for file chardevs. # -# @in: The name of the input file +# @in: The name of the input file # @out: The name of the output file # @append: Open the file in append mode (default false to # truncate) (Since 2.6) @@ -329,7 +329,6 @@ 'data': { '*signal': 'bool' }, 'base': 'ChardevCommon' } - ## # @ChardevSpiceChannel: # @@ -377,10 +376,10 @@ # # Configuration info for virtual console chardevs. # -# @width: console width, in pixels +# @width: console width, in pixels # @height: console height, in pixels -# @cols: console width, in chars -# @rows: console height, in chars +# @cols: console width, in chars +# @rows: console height, in chars # # Since: 1.5 ## @@ -413,7 +412,6 @@ # @clipboard: enable/disable clipboard, default is disabled. # # Since: 6.1 -# ## { 'struct': 'ChardevQemuVDAgent', 'data': { '*mouse': 'bool', diff --git a/qapi/common.json b/qapi/common.json index 412cc4f5ae..356db3f670 100644 --- a/qapi/common.json +++ b/qapi/common.json @@ -192,7 +192,6 @@ # Keys to toggle input-linux between host and guest. # # Since: 4.0 -# ## { 'enum': 'GrabToggleKeys', 'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock', @@ -204,7 +203,6 @@ # @human-readable-text: Formatted output intended for humans. # # Since: 6.2 -# ## { 'struct': 'HumanReadableText', 'data': { 'human-readable-text': 'str' } } diff --git a/qapi/control.json b/qapi/control.json index 71a838d49e..afca2043af 100644 --- a/qapi/control.json +++ b/qapi/control.json @@ -33,7 +33,6 @@ # all the QMP capabilities will be turned off by default. # # Since: 0.13 -# ## { 'command': 'qmp_capabilities', 'data': { '*enable': [ 'QMPCapability' ] }, @@ -49,7 +48,6 @@ # (Please refer to qmp-spec.txt for more information on OOB) # # Since: 2.12 -# ## { 'enum': 'QMPCapability', 'data': [ 'oob' ] } @@ -70,7 +68,6 @@ { 'struct': 'VersionTriple', 'data': {'major': 'int', 'minor': 'int', 'micro': 'int'} } - ## # @VersionInfo: # @@ -195,14 +192,14 @@ # # Options to be used for adding a new monitor. # -# @id: Name of the monitor +# @id: Name of the monitor # -# @mode: Selects the monitor mode (default: readline in the system -# emulator, control in qemu-storage-daemon) +# @mode: Selects the monitor mode (default: readline in the system +# emulator, control in qemu-storage-daemon) # -# @pretty: Enables pretty printing (QMP only) +# @pretty: Enables pretty printing (QMP only) # -# @chardev: Name of a character device to expose the monitor on +# @chardev: Name of a character device to expose the monitor on # # Since: 5.0 ## diff --git a/qapi/crypto.json b/qapi/crypto.json index 1ec54c15ca..15c24f0078 100644 --- a/qapi/crypto.json +++ b/qapi/crypto.json @@ -24,7 +24,6 @@ 'prefix': 'QCRYPTO_TLS_CREDS_ENDPOINT', 'data': ['client', 'server']} - ## # @QCryptoSecretFormat: # @@ -32,13 +31,13 @@ # # @raw: raw bytes. When encoded in JSON only valid UTF-8 sequences can be used # @base64: arbitrary base64 encoded binary data +# # Since: 2.6 ## { 'enum': 'QCryptoSecretFormat', 'prefix': 'QCRYPTO_SECRET_FORMAT', 'data': ['raw', 'base64']} - ## # @QCryptoHashAlgorithm: # @@ -51,13 +50,13 @@ # @sha384: SHA-384. (since 2.7) # @sha512: SHA-512. (since 2.7) # @ripemd160: RIPEMD-160. (since 2.7) +# # Since: 2.6 ## { 'enum': 'QCryptoHashAlgorithm', 'prefix': 'QCRYPTO_HASH_ALG', 'data': ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'ripemd160']} - ## # @QCryptoCipherAlgorithm: # @@ -75,6 +74,7 @@ # @twofish-128: Twofish with 128 bit / 16 byte keys # @twofish-192: Twofish with 192 bit / 24 byte keys # @twofish-256: Twofish with 256 bit / 32 byte keys +# # Since: 2.6 ## { 'enum': 'QCryptoCipherAlgorithm', @@ -85,7 +85,6 @@ 'serpent-128', 'serpent-192', 'serpent-256', 'twofish-128', 'twofish-192', 'twofish-256']} - ## # @QCryptoCipherMode: # @@ -95,13 +94,13 @@ # @cbc: Cipher Block Chaining # @xts: XEX with tweaked code book and ciphertext stealing # @ctr: Counter (Since 2.8) +# # Since: 2.6 ## { 'enum': 'QCryptoCipherMode', 'prefix': 'QCRYPTO_CIPHER_MODE', 'data': ['ecb', 'cbc', 'xts', 'ctr']} - ## # @QCryptoIVGenAlgorithm: # @@ -114,6 +113,7 @@ # @plain: 64-bit sector number truncated to 32-bits # @plain64: 64-bit sector number # @essiv: 64-bit sector number encrypted with a hash of the encryption key +# # Since: 2.6 ## { 'enum': 'QCryptoIVGenAlgorithm', @@ -170,12 +170,12 @@ # @key-secret: the ID of a QCryptoSecret object providing the # decryption key. Mandatory except when probing image for # metadata only. +# # Since: 2.6 ## { 'struct': 'QCryptoBlockOptionsLUKS', 'data': { '*key-secret': 'str' }} - ## # @QCryptoBlockCreateOptionsLUKS: # @@ -194,6 +194,7 @@ # @iter-time: number of milliseconds to spend in # PBKDF passphrase processing. Currently defaults # to 2000. (since 2.8) +# # Since: 2.6 ## { 'struct': 'QCryptoBlockCreateOptionsLUKS', @@ -205,7 +206,6 @@ '*hash-alg': 'QCryptoHashAlgorithm', '*iter-time': 'int'}} - ## # @QCryptoBlockOpenOptions: # @@ -220,7 +220,6 @@ 'data': { 'qcow': 'QCryptoBlockOptionsQCow', 'luks': 'QCryptoBlockOptionsLUKS' } } - ## # @QCryptoBlockCreateOptions: # @@ -235,7 +234,6 @@ 'data': { 'qcow': 'QCryptoBlockOptionsQCow', 'luks': 'QCryptoBlockCreateOptionsLUKS' } } - ## # @QCryptoBlockInfoBase: # @@ -249,7 +247,6 @@ { 'struct': 'QCryptoBlockInfoBase', 'data': { 'format': 'QCryptoBlockFormat' }} - ## # @QCryptoBlockInfoLUKSSlot: # @@ -269,7 +266,6 @@ '*stripes': 'int', 'key-offset': 'int' } } - ## # @QCryptoBlockInfoLUKS: # @@ -315,15 +311,14 @@ # # Defines state of keyslots that are affected by the update # -# @active: The slots contain the given password and marked as active -# @inactive: The slots are erased (contain garbage) and marked as inactive +# @active: The slots contain the given password and marked as active +# @inactive: The slots are erased (contain garbage) and marked as inactive # # Since: 5.1 ## { 'enum': 'QCryptoBlockLUKSKeyslotState', 'data': [ 'active', 'inactive' ] } - ## # @QCryptoBlockAmendOptionsLUKS: # @@ -332,33 +327,32 @@ # # @state: the desired state of the keyslots # -# @new-secret: The ID of a QCryptoSecret object providing the password to be -# written into added active keyslots +# @new-secret: The ID of a QCryptoSecret object providing the password to be +# written into added active keyslots # -# @old-secret: Optional (for deactivation only) -# If given will deactivate all keyslots that -# match password located in QCryptoSecret with this ID +# @old-secret: Optional (for deactivation only) +# If given will deactivate all keyslots that +# match password located in QCryptoSecret with this ID # -# @iter-time: Optional (for activation only) -# Number of milliseconds to spend in -# PBKDF passphrase processing for the newly activated keyslot. -# Currently defaults to 2000. +# @iter-time: Optional (for activation only) +# Number of milliseconds to spend in +# PBKDF passphrase processing for the newly activated keyslot. +# Currently defaults to 2000. # -# @keyslot: Optional. ID of the keyslot to activate/deactivate. -# For keyslot activation, keyslot should not be active already -# (this is unsafe to update an active keyslot), -# but possible if 'force' parameter is given. -# If keyslot is not given, first free keyslot will be written. +# @keyslot: Optional. ID of the keyslot to activate/deactivate. +# For keyslot activation, keyslot should not be active already +# (this is unsafe to update an active keyslot), +# but possible if 'force' parameter is given. +# If keyslot is not given, first free keyslot will be written. # -# For keyslot deactivation, this parameter specifies the exact -# keyslot to deactivate +# For keyslot deactivation, this parameter specifies the exact +# keyslot to deactivate # -# @secret: Optional. The ID of a QCryptoSecret object providing the -# password to use to retrieve current master key. -# Defaults to the same secret that was used to open the image +# @secret: Optional. The ID of a QCryptoSecret object providing the +# password to use to retrieve current master key. +# Defaults to the same secret that was used to open the image # -# -# Since 5.1 +# Since: 5.1 ## { 'struct': 'QCryptoBlockAmendOptionsLUKS', 'data': { 'state': 'QCryptoBlockLUKSKeyslotState', diff --git a/qapi/dump.json b/qapi/dump.json index 29441af9d8..90859c5483 100644 --- a/qapi/dump.json +++ b/qapi/dump.json @@ -186,8 +186,8 @@ # # Returns the available formats for dump-guest-memory # -# Returns: A @DumpGuestMemoryCapability object listing available formats for -# dump-guest-memory +# Returns: A @DumpGuestMemoryCapability object listing available formats for +# dump-guest-memory # # Since: 2.0 # diff --git a/qapi/job.json b/qapi/job.json index 1a6ef03451..d5f84e9615 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -173,7 +173,6 @@ ## { 'command': 'job-cancel', 'data': { 'id': 'str' } } - ## # @job-complete: # diff --git a/qapi/machine-target.json b/qapi/machine-target.json index f5ec4bc172..06b0d2ca61 100644 --- a/qapi/machine-target.json +++ b/qapi/machine-target.json @@ -54,7 +54,6 @@ { 'enum': 'CpuModelExpansionType', 'data': [ 'static', 'full' ] } - ## # @CpuModelCompareResult: # diff --git a/qapi/machine.json b/qapi/machine.json index 4c417e32a5..883ce3f9ea 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -77,7 +77,6 @@ # additional fields will be listed (since 3.0) # # Since: 2.12 -# ## { 'union' : 'CpuInfoFast', 'base' : { 'cpu-index' : 'int', @@ -299,6 +298,7 @@ # returning does not indicate that a guest has accepted the request or # that it has shut down. Many guests will respond to this command by # prompting the user in some way. +# # Example: # # -> { "execute": "system_powerdown" } @@ -315,9 +315,9 @@ # query-current-machine), wake-up guest from suspend if the guest is # in SUSPENDED state. Return an error otherwise. # -# Since: 1.1 +# Since: 1.1 # -# Returns: nothing. +# Returns: nothing. # # Note: prior to 4.0, this command does nothing in case the guest # isn't suspended. @@ -368,9 +368,9 @@ # Injects a Non-Maskable Interrupt into the default CPU (x86/s390) or all CPUs (ppc64). # The command fails when the guest doesn't support injecting. # -# Returns: If successful, nothing +# Returns: If successful, nothing # -# Since: 0.14 +# Since: 0.14 # # Note: prior to 2.1, this command was only supported for x86 and s390 VMs # @@ -503,6 +503,27 @@ 'val': 'uint8' }} ## +# @CXLFixedMemoryWindowOptions: +# +# Create a CXL Fixed Memory Window +# +# @size: Size of the Fixed Memory Window in bytes. Must be a multiple +# of 256MiB. +# @interleave-granularity: Number of contiguous bytes for which +# accesses will go to a given interleave target. +# Accepted values [256, 512, 1k, 2k, 4k, 8k, 16k] +# @targets: Target root bridge IDs from -device ...,id=<ID> for each root +# bridge. +# +# Since 7.1 +## +{ 'struct': 'CXLFixedMemoryWindowOptions', + 'data': { + 'size': 'size', + '*interleave-granularity': 'size', + 'targets': ['str'] }} + +## # @X86CPURegister32: # # A X86 32-bit register @@ -972,7 +993,7 @@ # preconfigure stage to configure numa mapping before initializing # machine. # -# Since 3.0 +# Since: 3.0 ## { 'command': 'set-numa-node', 'boxed': true, 'data': 'NumaOptions', @@ -1021,7 +1042,6 @@ # Formula used: logical_vm_size = vm_ram_size - balloon_size # # Since: 0.14 -# ## { 'struct': 'BalloonInfo', 'data': {'actual': 'int' } } @@ -1366,7 +1386,6 @@ { 'event': 'MEMORY_DEVICE_SIZE_CHANGE', 'data': { '*id': 'str', 'size': 'size', 'qom-path' : 'str'} } - ## # @MEM_UNPLUG_ERROR: # @@ -1396,6 +1415,36 @@ 'features': ['deprecated'] } ## +# @BootConfiguration: +# +# Schema for virtual machine boot configuration. +# +# @order: Boot order (a=floppy, c=hard disk, d=CD-ROM, n=network) +# +# @once: Boot order to apply on first boot +# +# @menu: Whether to show a boot menu +# +# @splash: The name of the file to be passed to the firmware as logo picture, if @menu is true. +# +# @splash-time: How long to show the logo picture, in milliseconds +# +# @reboot-timeout: Timeout before guest reboots after boot fails +# +# @strict: Whether to attempt booting from devices not included in the boot order +# +# Since: 7.1 +## +{ 'struct': 'BootConfiguration', 'data': { + '*order': 'str', + '*once': 'str', + '*menu': 'bool', + '*splash': 'str', + '*splash-time': 'int', + '*reboot-timeout': 'int', + '*strict': 'bool' } } + +## # @SMPConfiguration: # # Schema for CPU topology configuration. A missing value lets @@ -1584,3 +1633,21 @@ ## { 'enum': 'SmbiosEntryPointType', 'data': [ '32', '64' ] } + +## +# @MemorySizeConfiguration: +# +# Schema for memory size configuration. +# +# @size: memory size in bytes +# +# @max-size: maximum hotpluggable memory size in bytes +# +# @slots: number of available memory slots for hotplug +# +# Since: 7.1 +## +{ 'struct': 'MemorySizeConfiguration', 'data': { + '*size': 'size', + '*max-size': 'size', + '*slots': 'uint64' } } diff --git a/qapi/migration.json b/qapi/migration.json index 409eb086a2..6130cd9fae 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -151,7 +151,6 @@ # (since 4.2) # # Since: 2.3 -# ## { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', @@ -166,7 +165,6 @@ # @transferred: amount of bytes transferred to the target VM by VFIO devices # # Since: 5.2 -# ## { 'struct': 'VfioStats', 'data': {'transferred': 'int' } } @@ -546,7 +544,6 @@ # @zstd: use zstd compression method. # # Since: 5.0 -# ## { 'enum': 'MultiFDCompression', 'data': [ 'none', 'zlib', @@ -741,6 +738,13 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # +# @zero-copy-send: Controls behavior on sending memory pages on migration. +# When true, enables a zero-copy mechanism for sending +# memory pages, if host supports it. +# Requires that QEMU be permitted to use locked memory +# for guest RAM pages. +# Defaults to false. (Since 7.1) +# # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the @@ -780,6 +784,7 @@ 'xbzrle-cache-size', 'max-postcopy-bandwidth', 'max-cpu-throttle', 'multifd-compression', 'multifd-zlib-level' ,'multifd-zstd-level', + { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, 'block-bitmap-mapping' ] } ## @@ -906,6 +911,13 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # +# @zero-copy-send: Controls behavior on sending memory pages on migration. +# When true, enables a zero-copy mechanism for sending +# memory pages, if host supports it. +# Requires that QEMU be permitted to use locked memory +# for guest RAM pages. +# Defaults to false. (Since 7.1) +# # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the @@ -960,6 +972,7 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', + '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## @@ -1106,6 +1119,13 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # +# @zero-copy-send: Controls behavior on sending memory pages on migration. +# When true, enables a zero-copy mechanism for sending +# memory pages, if host supports it. +# Requires that QEMU be permitted to use locked memory +# for guest RAM pages. +# Defaults to false. (Since 7.1) +# # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the @@ -1158,6 +1178,7 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', + '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## @@ -1194,10 +1215,10 @@ # ask the client to automatically reconnect using the new parameters # once migration finished successfully. Only implemented for SPICE. # -# @protocol: must be "spice" -# @hostname: migration target hostname -# @port: spice tcp port for plaintext channels -# @tls-port: spice tcp port for tls-secured channels +# @protocol: must be "spice" +# @hostname: migration target hostname +# @port: spice tcp port for plaintext channels +# @tls-port: spice tcp port for tls-secured channels # @cert-subject: server certificate subject # # Since: 0.14 @@ -1422,7 +1443,9 @@ # @state: The state the migration is currently expected to be in # # Returns: nothing on success +# # Since: 2.11 +# # Example: # # -> { "execute": "migrate-continue" , "arguments": @@ -1736,6 +1759,7 @@ # Since: 4.2 # # Example: +# # <- { "event": "UNPLUG_PRIMARY", # "data": { "device-id": "hostdev0" }, # "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } @@ -1754,7 +1778,6 @@ # @dirty-rate: dirty rate. # # Since: 6.2 -# ## { 'struct': 'DirtyRateVcpu', 'data': { 'id': 'int', 'dirty-rate': 'int64' } } @@ -1771,7 +1794,6 @@ # @measured: the dirtyrate thread has measured and results are available. # # Since: 5.2 -# ## { 'enum': 'DirtyRateStatus', 'data': [ 'unstarted', 'measuring', 'measured'] } @@ -1788,7 +1810,6 @@ # @dirty-bitmap: calculate dirtyrate by dirty bitmap. # # Since: 6.2 -# ## { 'enum': 'DirtyRateMeasureMode', 'data': ['page-sampling', 'dirty-ring', 'dirty-bitmap'] } @@ -1818,7 +1839,6 @@ # mode specified (Since 6.2) # # Since: 5.2 -# ## { 'struct': 'DirtyRateInfo', 'data': {'*dirty-rate': 'int64', @@ -1845,6 +1865,7 @@ # Since: 5.2 # # Example: +# # {"execute": "calc-dirty-rate", "arguments": {"calc-time": 1, # 'sample-pages': 512} } # diff --git a/qapi/misc-target.json b/qapi/misc-target.json index ed4a468aab..4944c0528f 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -21,7 +21,6 @@ { 'command': 'rtc-reset-reinjection', 'if': 'TARGET_I386' } - ## # @SevState: # @@ -101,7 +100,6 @@ { 'command': 'query-sev', 'returns': 'SevInfo', 'if': 'TARGET_I386' } - ## # @SevLaunchMeasureInfo: # @@ -110,7 +108,6 @@ # @data: the measurement value encoded in base64 # # Since: 2.12 -# ## { 'struct': 'SevLaunchMeasureInfo', 'data': {'data': 'str'}, 'if': 'TARGET_I386' } @@ -133,16 +130,15 @@ { 'command': 'query-sev-launch-measure', 'returns': 'SevLaunchMeasureInfo', 'if': 'TARGET_I386' } - ## # @SevCapability: # # The struct describes capability for a Secure Encrypted Virtualization # feature. # -# @pdh: Platform Diffie-Hellman key (base64 encoded) +# @pdh: Platform Diffie-Hellman key (base64 encoded) # -# @cert-chain: PDH certificate chain (base64 encoded) +# @cert-chain: PDH certificate chain (base64 encoded) # # @cpu0-id: Unique ID of CPU0 (base64 encoded) (since 7.1) # @@ -194,7 +190,6 @@ # @gpa: the guest physical address where secret will be injected. # # Since: 6.0 -# ## { 'command': 'sev-inject-launch-secret', 'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' }, @@ -206,8 +201,7 @@ # The struct describes attestation report for a Secure Encrypted # Virtualization feature. # -# @data: guest attestation report (base64 encoded) -# +# @data: guest attestation report (base64 encoded) # # Since: 6.1 ## @@ -307,7 +301,6 @@ { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'], 'if': 'TARGET_ARM' } - ## # @SGXEPCSection: # diff --git a/qapi/misc.json b/qapi/misc.json index b83cc39029..45344483cd 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -136,7 +136,7 @@ # # Stop all guest VCPU execution. # -# Since: 0.14 +# Since: 0.14 # # Notes: This function will succeed even if the guest is already in the stopped # state. In "inmigrate" state, it will ensure that the guest @@ -156,9 +156,9 @@ # # Resume guest VCPU execution. # -# Since: 0.14 +# Since: 0.14 # -# Returns: If successful, nothing +# Returns: If successful, nothing # # Notes: This command will succeed if the guest is currently running. It # will also succeed if the guest is in the "inmigrate" state; in @@ -188,7 +188,7 @@ # Features: # @unstable: This command is experimental. # -# Since 3.0 +# Since: 3.0 # # Returns: nothing # diff --git a/qapi/net.json b/qapi/net.json index b92f3f5fb4..d6f7cfd4d6 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -453,6 +453,120 @@ '*queues': 'int' } } ## +# @NetdevVmnetHostOptions: +# +# vmnet (host mode) network backend. +# +# Allows the vmnet interface to communicate with other vmnet +# interfaces that are in host mode and also with the host. +# +# @start-address: The starting IPv4 address to use for the interface. +# Must be in the private IP range (RFC 1918). Must be +# specified along with @end-address and @subnet-mask. +# This address is used as the gateway address. The +# subsequent address up to and including end-address are +# placed in the DHCP pool. +# +# @end-address: The DHCP IPv4 range end address to use for the +# interface. Must be in the private IP range (RFC 1918). +# Must be specified along with @start-address and +# @subnet-mask. +# +# @subnet-mask: The IPv4 subnet mask to use on the interface. Must +# be specified along with @start-address and @subnet-mask. +# +# @isolated: Enable isolation for this interface. Interface isolation +# ensures that vmnet interface is not able to communicate +# with any other vmnet interfaces. Only communication with +# host is allowed. Requires at least macOS Big Sur 11.0. +# +# @net-uuid: The identifier (UUID) to uniquely identify the isolated +# network vmnet interface should be added to. If +# set, no DHCP service is provided for this interface and +# network communication is allowed only with other interfaces +# added to this network identified by the UUID. Requires +# at least macOS Big Sur 11.0. +# +# Since: 7.1 +## +{ 'struct': 'NetdevVmnetHostOptions', + 'data': { + '*start-address': 'str', + '*end-address': 'str', + '*subnet-mask': 'str', + '*isolated': 'bool', + '*net-uuid': 'str' }, + 'if': 'CONFIG_VMNET' } + +## +# @NetdevVmnetSharedOptions: +# +# vmnet (shared mode) network backend. +# +# Allows traffic originating from the vmnet interface to reach the +# Internet through a network address translator (NAT). +# The vmnet interface can communicate with the host and with +# other shared mode interfaces on the same subnet. If no DHCP +# settings, subnet mask and IPv6 prefix specified, the interface can +# communicate with any of other interfaces in shared mode. +# +# @start-address: The starting IPv4 address to use for the interface. +# Must be in the private IP range (RFC 1918). Must be +# specified along with @end-address and @subnet-mask. +# This address is used as the gateway address. The +# subsequent address up to and including end-address are +# placed in the DHCP pool. +# +# @end-address: The DHCP IPv4 range end address to use for the +# interface. Must be in the private IP range (RFC 1918). +# Must be specified along with @start-address and @subnet-mask. +# +# @subnet-mask: The IPv4 subnet mask to use on the interface. Must +# be specified along with @start-address and @subnet-mask. +# +# @isolated: Enable isolation for this interface. Interface isolation +# ensures that vmnet interface is not able to communicate +# with any other vmnet interfaces. Only communication with +# host is allowed. Requires at least macOS Big Sur 11.0. +# +# @nat66-prefix: The IPv6 prefix to use into guest network. Must be a +# unique local address i.e. start with fd00::/8 and have +# length of 64. +# +# Since: 7.1 +## +{ 'struct': 'NetdevVmnetSharedOptions', + 'data': { + '*start-address': 'str', + '*end-address': 'str', + '*subnet-mask': 'str', + '*isolated': 'bool', + '*nat66-prefix': 'str' }, + 'if': 'CONFIG_VMNET' } + +## +# @NetdevVmnetBridgedOptions: +# +# vmnet (bridged mode) network backend. +# +# Bridges the vmnet interface with a physical network interface. +# +# @ifname: The name of the physical interface to be bridged. +# +# @isolated: Enable isolation for this interface. Interface isolation +# ensures that vmnet interface is not able to communicate +# with any other vmnet interfaces. Only communication with +# host is allowed. Requires at least macOS Big Sur 11.0. +# +# Since: 7.1 +## +{ 'struct': 'NetdevVmnetBridgedOptions', + 'data': { + 'ifname': 'str', + '*isolated': 'bool' }, + 'if': 'CONFIG_VMNET' } + +## # @NetClientDriver: # # Available netdev drivers. @@ -460,10 +574,16 @@ # Since: 2.7 # # @vhost-vdpa since 5.1 +# @vmnet-host since 7.1 +# @vmnet-shared since 7.1 +# @vmnet-bridged since 7.1 ## { 'enum': 'NetClientDriver', 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', - 'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] } + 'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa', + { 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' }, + { 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' }, + { 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] } ## # @Netdev: @@ -477,6 +597,9 @@ # Since: 1.2 # # 'l2tpv3' - since 2.1 +# 'vmnet-host' - since 7.1 +# 'vmnet-shared' - since 7.1 +# 'vmnet-bridged' - since 7.1 ## { 'union': 'Netdev', 'base': { 'id': 'str', 'type': 'NetClientDriver' }, @@ -492,7 +615,13 @@ 'hubport': 'NetdevHubPortOptions', 'netmap': 'NetdevNetmapOptions', 'vhost-user': 'NetdevVhostUserOptions', - 'vhost-vdpa': 'NetdevVhostVDPAOptions' } } + 'vhost-vdpa': 'NetdevVhostVDPAOptions', + 'vmnet-host': { 'type': 'NetdevVmnetHostOptions', + 'if': 'CONFIG_VMNET' }, + 'vmnet-shared': { 'type': 'NetdevVmnetSharedOptions', + 'if': 'CONFIG_VMNET' }, + 'vmnet-bridged': { 'type': 'NetdevVmnetBridgedOptions', + 'if': 'CONFIG_VMNET' } } } ## # @RxState: diff --git a/qapi/pragma.json b/qapi/pragma.json index e6a021c19c..7f810b0e97 100644 --- a/qapi/pragma.json +++ b/qapi/pragma.json @@ -6,7 +6,7 @@ # Whitelists to permit QAPI rule violations; think twice before you # add to them! { 'pragma': { - # Commands allowed to return a non-dictionary: + # Command names containing '_' 'command-name-exceptions': [ 'add_client', 'block_resize', @@ -24,6 +24,7 @@ 'system_powerdown', 'system_reset', 'system_wakeup' ], + # Commands allowed to return a non-dictionary 'command-returns-exceptions': [ 'human-monitor-command', 'qom-get', diff --git a/qapi/replay.json b/qapi/replay.json index 351898f60d..729470300d 100644 --- a/qapi/replay.json +++ b/qapi/replay.json @@ -40,7 +40,6 @@ # @icount: current number of executed instructions. # # Since: 5.2 -# ## { 'struct': 'ReplayInfo', 'data': { 'mode': 'ReplayMode', '*filename': 'str', 'icount': 'int' } } diff --git a/qapi/run-state.json b/qapi/run-state.json index 8124220bd9..6e2162d7b3 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -104,7 +104,7 @@ # # @status: the virtual machine @RunState # -# Since: 0.14 +# Since: 0.14 # # Notes: @singlestep is enabled through the GDB stub ## @@ -118,7 +118,7 @@ # # Returns: @StatusInfo reflecting all VCPUs # -# Since: 0.14 +# Since: 0.14 # # Example: # @@ -348,7 +348,7 @@ # # @poweroff: Shutdown the VM and exit # -# @pause: pause the VM# +# @pause: pause the VM # # Since: 6.0 ## @@ -592,13 +592,11 @@ # @guest: memory failure at guest memory, # # Since: 5.2 -# ## { 'enum': 'MemoryFailureRecipient', 'data': [ 'hypervisor', 'guest' ] } - ## # @MemoryFailureAction: # @@ -619,7 +617,6 @@ # to handle memory failures. # # Since: 5.2 -# ## { 'enum': 'MemoryFailureAction', 'data': [ 'ignore', @@ -639,7 +636,6 @@ # failure was still in progress. # # Since: 5.2 -# ## { 'struct': 'MemoryFailureFlags', 'data': { 'action-required': 'bool', diff --git a/qapi/sockets.json b/qapi/sockets.json index fccc38584b..bad74e34d3 100644 --- a/qapi/sockets.json +++ b/qapi/sockets.json @@ -167,9 +167,9 @@ # # Available SocketAddress types # -# @inet: Internet address +# @inet: Internet address # -# @unix: Unix domain socket +# @unix: Unix domain socket # # @vsock: VMCI address # @@ -189,7 +189,7 @@ # Captures the address of a socket, which could also be a named file # descriptor # -# @type: Transport type +# @type: Transport type # # Since: 2.9 ## diff --git a/qapi/ui.json b/qapi/ui.json index 059302a5ef..11a827d10f 100644 --- a/qapi/ui.json +++ b/qapi/ui.json @@ -15,7 +15,6 @@ # Display protocols which support changing password options. # # Since: 7.0 -# ## { 'enum': 'DisplayProtocol', 'data': [ 'vnc', 'spice' ] } @@ -32,7 +31,6 @@ # @disconnect: disconnect existing clients # # Since: 7.0 -# ## { 'enum': 'SetPasswordAction', 'data': [ 'keep', 'fail', 'disconnect' ] } @@ -52,7 +50,6 @@ # For VNC, only 'keep' is currently implemented. # # Since: 7.0 -# ## { 'union': 'SetPasswordOptions', 'base': { 'protocol': 'DisplayProtocol', @@ -70,7 +67,6 @@ # Defaults to the first. # # Since: 7.0 -# ## { 'struct': 'SetPasswordOptionsVnc', 'data': { '*display': 'str' } } @@ -115,7 +111,6 @@ # sure you are on the same machine as the QEMU instance. # # Since: 7.0 -# ## { 'union': 'ExpirePasswordOptions', 'base': { 'protocol': 'DisplayProtocol', @@ -132,9 +127,7 @@ # Defaults to the first. # # Since: 7.0 -# ## - { 'struct': 'ExpirePasswordOptionsVnc', 'data': { '*display': 'str' } } @@ -167,7 +160,6 @@ # @ppm: PPM format # # Since: 7.1 -# ## { 'enum': 'ImageFormat', 'data': ['ppm', 'png'] } @@ -902,7 +894,6 @@ # are effectively synonyms. # # Since: 1.3 -# ## { 'enum': 'QKeyCode', 'data': [ 'unmapped', @@ -1027,8 +1018,8 @@ # # Keyboard input event. # -# @key: Which key this event is for. -# @down: True for key-down and false for key-up events. +# @key: Which key this event is for. +# @down: True for key-down and false for key-up events. # # Since: 2.0 ## @@ -1042,7 +1033,7 @@ # Pointer button input event. # # @button: Which button this event is for. -# @down: True for key-down and false for key-up events. +# @down: True for key-down and false for key-up events. # # Since: 2.0 ## @@ -1206,7 +1197,6 @@ # Since 3.1 # # Since: 2.12 -# ## { 'struct' : 'DisplayGTK', 'data' : { '*grab-on-hover' : 'bool', @@ -1221,7 +1211,6 @@ # available node on the host. # # Since: 3.1 -# ## { 'struct' : 'DisplayEGLHeadless', 'data' : { '*rendernode' : 'str' } } @@ -1242,7 +1231,6 @@ # @audiodev: Use the specified DBus audiodev to export audio. # # Since: 7.0 -# ## { 'struct' : 'DisplayDBus', 'data' : { '*rendernode' : 'str', @@ -1250,21 +1238,20 @@ '*p2p': 'bool', '*audiodev': 'str' } } - ## - # @DisplayGLMode: - # - # Display OpenGL mode. - # - # @off: Disable OpenGL (default). - # @on: Use OpenGL, pick context type automatically. - # Would better be named 'auto' but is called 'on' for backward - # compatibility with bool type. - # @core: Use OpenGL with Core (desktop) Context. - # @es: Use OpenGL with ES (embedded systems) Context. - # - # Since: 3.0 - # - ## +## +# @DisplayGLMode: +# +# Display OpenGL mode. +# +# @off: Disable OpenGL (default). +# @on: Use OpenGL, pick context type automatically. +# Would better be named 'auto' but is called 'on' for backward +# compatibility with bool type. +# @core: Use OpenGL with Core (desktop) Context. +# @es: Use OpenGL with ES (embedded systems) Context. +# +# Since: 3.0 +## { 'enum' : 'DisplayGLMode', 'data' : [ 'off', 'on', 'core', 'es' ] } @@ -1273,10 +1260,9 @@ # # Curses display options. # -# @charset: Font charset used by guest (default: CP437). +# @charset: Font charset used by guest (default: CP437). # # Since: 4.0 -# ## { 'struct' : 'DisplayCurses', 'data' : { '*charset' : 'str' } } @@ -1346,7 +1332,6 @@ # @dbus: Start a D-Bus service for the display. (Since 7.0) # # Since: 2.12 -# ## { 'enum' : 'DisplayType', 'data' : [ @@ -1368,15 +1353,13 @@ # # Display (user interface) options. # -# @type: Which DisplayType qemu should use. -# @full-screen: Start user interface in fullscreen mode (default: off). -# @window-close: Allow to quit qemu with window close button (default: on). -# @show-cursor: Force showing the mouse cursor (default: off). -# (since: 5.0) -# @gl: Enable OpenGL support (default: off). +# @type: Which DisplayType qemu should use. +# @full-screen: Start user interface in fullscreen mode (default: off). +# @window-close: Allow to quit qemu with window close button (default: on). +# @show-cursor: Force showing the mouse cursor (default: off). (since: 5.0) +# @gl: Enable OpenGL support (default: off). # # Since: 2.12 -# ## { 'union' : 'DisplayOptions', 'base' : { 'type' : 'DisplayType', @@ -1403,7 +1386,6 @@ # Returns: @DisplayOptions # # Since: 3.1 -# ## { 'command': 'query-display-options', 'returns': 'DisplayOptions' } @@ -1416,7 +1398,6 @@ # @vnc: VNC display # # Since: 6.0 -# ## { 'enum': 'DisplayReloadType', 'data': ['vnc'] } @@ -1429,7 +1410,6 @@ # @tls-certs: reload tls certs or not. # # Since: 6.0 -# ## { 'struct': 'DisplayReloadOptionsVNC', 'data': { '*tls-certs': 'bool' } } @@ -1442,7 +1422,6 @@ # @type: Specify the display type. # # Since: 6.0 -# ## { 'union': 'DisplayReloadOptions', 'base': {'type': 'DisplayReloadType'}, @@ -1477,7 +1456,6 @@ # @vnc: VNC display # # Since: 7.1 -# ## { 'enum': 'DisplayUpdateType', 'data': ['vnc'] } @@ -1492,7 +1470,6 @@ # for websockets are not touched. # # Since: 7.1 -# ## { 'struct': 'DisplayUpdateOptionsVNC', 'data': { '*addresses': ['SocketAddress'] } } @@ -1505,7 +1482,6 @@ # @type: Specify the display type. # # Since: 7.1 -# ## { 'union': 'DisplayUpdateOptions', 'base': {'type': 'DisplayUpdateType'}, diff --git a/qemu-nbd.c b/qemu-nbd.c index 2382b5042a..0cd5aa6f02 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -1095,7 +1095,7 @@ int main(int argc, char **argv) bs->detect_zeroes = detect_zeroes; - nbd_server_is_qemu_nbd(true); + nbd_server_is_qemu_nbd(shared); export_opts = g_new(BlockExportOptions, 1); *export_opts = (BlockExportOptions) { diff --git a/qemu-options.hx b/qemu-options.hx index 796229c433..a664baaa18 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -467,6 +467,44 @@ SRST -numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8 ERST +DEF("cxl-fixed-memory-window", HAS_ARG, QEMU_OPTION_cxl_fixed_memory_window, + "-cxl-fixed-memory-window targets.0=firsttarget,targets.1=secondtarget,size=size[,interleave-granularity=granularity]\n", + QEMU_ARCH_ALL) +SRST +``-cxl-fixed-memory-window targets.0=firsttarget,targets.1=secondtarget,size=size[,interleave-granularity=granularity]`` + Define a CXL Fixed Memory Window (CFMW). + + Described in the CXL 2.0 ECN: CEDT CFMWS & QTG _DSM. + + They are regions of Host Physical Addresses (HPA) on a system which + may be interleaved across one or more CXL host bridges. The system + software will assign particular devices into these windows and + configure the downstream Host-managed Device Memory (HDM) decoders + in root ports, switch ports and devices appropriately to meet the + interleave requirements before enabling the memory devices. + + ``targets.X=firsttarget`` provides the mapping to CXL host bridges + which may be identified by the id provied in the -device entry. + Multiple entries are needed to specify all the targets when + the fixed memory window represents interleaved memory. X is the + target index from 0. + + ``size=size`` sets the size of the CFMW. This must be a multiple of + 256MiB. The region will be aligned to 256MiB but the location is + platform and configuration dependent. + + ``interleave-granularity=granularity`` sets the granularity of + interleave. Default 256KiB. Only 256KiB, 512KiB, 1024KiB, 2048KiB + 4096KiB, 8192KiB and 16384KiB granularities supported. + + Example: + + :: + + -cxl-fixed-memory-window targets.0=cxl.0,targets.1=cxl.1,size=128G,interleave-granularity=512k + +ERST + DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, "-add-fd fd=fd,set=set[,opaque=opaque]\n" " Add 'fd' to fd 'set'\n", QEMU_ARCH_ALL) @@ -661,6 +699,30 @@ SRST (deprecated) environment variables. ERST +DEF("audio", HAS_ARG, QEMU_OPTION_audio, + "-audio [driver=]driver,model=value[,prop[=value][,...]]\n" + " specifies the audio backend and device to use;\n" + " apart from 'model', options are the same as for -audiodev.\n" + " use '-audio model=help' to show possible devices.\n", + QEMU_ARCH_ALL) +SRST +``-audio [driver=]driver,model=value[,prop[=value][,...]]`` + This option is a shortcut for configuring both the guest audio + hardware and the host audio backend in one go. + The host backend options are the same as with the corresponding + ``-audiodev`` options below. The guest hardware model can be set with + ``model=modelname``. Use ``model=help`` to list the available device + types. + + The following two example do exactly the same, to show how ``-audio`` + can be used to shorten the command line length: + + .. parsed-literal:: + + |qemu_system| -audiodev pa,id=pa -device sb16,audiodev=pa + |qemu_system| -audio pa,model=sb16 +ERST + DEF("audiodev", HAS_ARG, QEMU_OPTION_audiodev, "-audiodev [driver=]driver,id=id[,prop[=value][,...]]\n" " specifies the audio backend to use\n" @@ -892,33 +954,6 @@ SRST ``qemu.wav``. ERST -DEF("soundhw", HAS_ARG, QEMU_OPTION_soundhw, - "-soundhw c1,... enable audio support\n" - " and only specified sound cards (comma separated list)\n" - " use '-soundhw help' to get the list of supported cards\n" - " use '-soundhw all' to enable all of them\n", QEMU_ARCH_ALL) -SRST -``-soundhw card1[,card2,...] or -soundhw all`` - Enable audio and selected sound hardware. Use 'help' to print all - available sound hardware. For example: - - .. parsed-literal:: - - |qemu_system_x86| -soundhw sb16,adlib disk.img - |qemu_system_x86| -soundhw es1370 disk.img - |qemu_system_x86| -soundhw ac97 disk.img - |qemu_system_x86| -soundhw hda disk.img - |qemu_system_x86| -soundhw all disk.img - |qemu_system_x86| -soundhw help - - Note that Linux's i810\_audio OSS kernel (for AC97) module might - require manually specifying clocking. - - :: - - modprobe i810_audio clocking=48000 -ERST - DEF("device", HAS_ARG, QEMU_OPTION_device, "-device driver[,prop[=value][,...]]\n" " add device (based on driver)\n" @@ -2761,6 +2796,25 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n" " configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n" #endif +#ifdef CONFIG_VMNET + "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n" + " [,start-address=addr,end-address=addr,subnet-mask=mask]\n" + " configure a vmnet network backend in host mode with ID 'str',\n" + " isolate this interface from others with 'isolated',\n" + " configure the address range and choose a subnet mask,\n" + " specify network UUID 'uuid' to disable DHCP and interact with\n" + " vmnet-host interfaces within this isolated network\n" + "-netdev vmnet-shared,id=str[,isolated=on|off][,nat66-prefix=addr]\n" + " [,start-address=addr,end-address=addr,subnet-mask=mask]\n" + " configure a vmnet network backend in shared mode with ID 'str',\n" + " configure the address range and choose a subnet mask,\n" + " set IPv6 ULA prefix (of length 64) to use for internal network,\n" + " isolate this interface from others with 'isolated'\n" + "-netdev vmnet-bridged,id=str,ifname=name[,isolated=on|off]\n" + " configure a vmnet network backend in bridged mode with ID 'str',\n" + " use 'ifname=name' to select a physical network interface to be bridged,\n" + " isolate this interface from others with 'isolated'\n" +#endif "-netdev hubport,id=str,hubid=n[,netdev=nd]\n" " configure a hub port on the hub with ID 'n'\n", QEMU_ARCH_ALL) DEF("nic", HAS_ARG, QEMU_OPTION_nic, @@ -2780,6 +2834,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic, #ifdef CONFIG_POSIX "vhost-user|" #endif +#ifdef CONFIG_VMNET + "vmnet-host|vmnet-shared|vmnet-bridged|" +#endif "socket][,option][,...][mac=macaddr]\n" " initialize an on-board / default host NIC (using MAC address\n" " macaddr) and connect it to the given host network backend\n" @@ -2802,6 +2859,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, #ifdef CONFIG_NETMAP "netmap|" #endif +#ifdef CONFIG_VMNET + "vmnet-host|vmnet-shared|vmnet-bridged|" +#endif "socket][,option][,option][,...]\n" " old way to initialize a host network interface\n" " (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL) diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index 4d8e506c9e..f989597b0c 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -950,7 +950,7 @@ # by device mapper # @smart: disk smart information (Since 7.1) # -# Since 5.2 +# Since: 5.2 ## { 'struct': 'GuestDiskInfo', 'data': {'name': 'str', 'partition': 'bool', '*dependencies': ['str'], diff --git a/qga/vss-win32/meson.build b/qga/vss-win32/meson.build index 26c5dd6e0e..9483ccd3b8 100644 --- a/qga/vss-win32/meson.build +++ b/qga/vss-win32/meson.build @@ -7,7 +7,7 @@ link_args = cc.get_supported_link_arguments([ qga_vss = shared_module( 'qga-vss', - ['requester.cpp', 'provider.cpp', 'install.cpp'], + ['requester.cpp', 'provider.cpp', 'install.cpp', genh], name_prefix: '', cpp_args: ['-Wno-unknown-pragmas', '-Wno-delete-non-virtual-dtor', '-Wno-non-virtual-dtor'], link_args: link_args, diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp index 4513324dd2..b371affeab 100644 --- a/qga/vss-win32/requester.cpp +++ b/qga/vss-win32/requester.cpp @@ -354,12 +354,12 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset) if (FAILED(hr)) { err_set(errset, hr, "failed to add %S to snapshot set", volume_name_wchar); - delete volume_name_wchar; + delete[] volume_name_wchar; goto out; } num_mount_points++; - delete volume_name_wchar; + delete[] volume_name_wchar; } if (num_mount_points == 0) { diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4763d02ae7..d900d18048 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2831,8 +2831,8 @@ sub process { } # check for pointless casting of g_malloc return - if ($line =~ /\*\s*\)\s*g_(try)?(m|re)alloc(0?)(_n)?\b/) { - if ($2 == 'm') { + if ($line =~ /\*\s*\)\s*g_(try|)(m|re)alloc(0?)(_n)?\b/) { + if ($2 eq 'm') { ERROR("unnecessary cast may hide bugs, use g_$1new$3 instead\n" . $herecurr); } else { ERROR("unnecessary cast may hide bugs, use g_$1renew$3 instead\n" . $herecurr); diff --git a/scripts/device-crash-test b/scripts/device-crash-test index 4bfc68c008..a203b3fdea 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -93,6 +93,7 @@ ERROR_RULE_LIST = [ {'device':'pci-bridge', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. {'device':'pci-bridge-seat', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. {'device':'pxb', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'pxb-cxl', 'expected':True}, # pxb-cxl devices cannot reside on a PCI bus. {'device':'scsi-block', 'expected':True}, # drive property not set {'device':'scsi-generic', 'expected':True}, # drive property not set {'device':'scsi-hd', 'expected':True}, # drive property not set diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index f140040104..ce27f5e635 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -23,6 +23,7 @@ MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 MSR_IA32_VMX_VMFUNC = 0x491 +MSR_IA32_VMX_PROCBASED_CTLS3 = 0x492 class msr(object): def __init__(self): @@ -71,6 +72,13 @@ class Control(object): s = 'yes' print(' %-40s %s' % (self.bits[bit], s)) +# All 64 bits in the tertiary controls MSR are allowed-1 +class Allowed1Control(Control): + def read2(self, nr): + m = msr() + val = m.read(nr, 0) + return (0, val) + class Misc(object): def __init__(self, name, bits, msr): self.name = name @@ -135,6 +143,7 @@ controls = [ 12: 'RDTSC exiting', 15: 'CR3-load exiting', 16: 'CR3-store exiting', + 17: 'Activate tertiary controls', 19: 'CR8-load exiting', 20: 'CR8-store exiting', 21: 'Use TPR shadow', @@ -186,6 +195,14 @@ controls = [ cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, ), + Allowed1Control( + name = 'tertiary processor-based controls', + bits = { + 4: 'Enable IPI virtualization' + }, + cap_msr = MSR_IA32_VMX_PROCBASED_CTLS3, + ), + Control( name = 'VM-Exit controls', bits = { diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 5d2172bfb4..9b0d151dad 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -158,6 +158,7 @@ meson_options_help() { printf "%s\n" ' vhost-kernel vhost kernel backend support' printf "%s\n" ' vhost-net vhost-net kernel acceleration support' printf "%s\n" ' vhost-user vhost-user backend support' + printf "%s\n" ' vmnet vmnet.framework network backend support' printf "%s\n" ' vhost-user-blk-server' printf "%s\n" ' build vhost-user-blk server' printf "%s\n" ' vhost-vdpa vhost-vdpa kernel backend support' diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py index 48578e1698..5a1782b57e 100644 --- a/scripts/qapi/expr.py +++ b/scripts/qapi/expr.py @@ -443,7 +443,7 @@ def check_features(features: Optional[object], check_keys(feat, info, source, ['name'], ['if']) check_name_is_str(feat['name'], info, source) source = "%s '%s'" % (source, feat['name']) - check_name_str(feat['name'], info, source) + check_name_lower(feat['name'], info, source) check_if(feat, info, source) diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c index 451c7631b7..3be52a98d5 100644 --- a/scsi/pr-manager-helper.c +++ b/scsi/pr-manager-helper.c @@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, iov.iov_base = (void *)buf; iov.iov_len = sz; n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, - nfds ? &fd : NULL, nfds, errp); + nfds ? &fd : NULL, nfds, 0, errp); if (n_written <= 0) { assert(n_written != QIO_CHANNEL_ERR_BLOCK); diff --git a/slirp b/slirp -Subproject a88d9ace234a24ce1c17189642ef9104799425e +Subproject 9d59bb775d6294c8b447a88512f7bb43f12a25a diff --git a/softmmu/bootdevice.c b/softmmu/bootdevice.c index c0713bfa9f..2106f1026f 100644 --- a/softmmu/bootdevice.c +++ b/softmmu/bootdevice.c @@ -268,7 +268,8 @@ char *get_boot_devices_list(size_t *size) *size = total; - if (boot_strict && *size > 0) { + if (current_machine->boot_config.has_strict && + current_machine->boot_config.strict && *size > 0) { list[total-1] = '\n'; list = g_realloc(list, total + 5); memcpy(&list[total], "HALT", 5); diff --git a/softmmu/globals.c b/softmmu/globals.c index 98b64e0492..916bc12e2b 100644 --- a/softmmu/globals.c +++ b/softmmu/globals.c @@ -54,8 +54,6 @@ int alt_grab; int ctrl_grab; unsigned int nb_prom_envs; const char *prom_envs[MAX_PROM_ENVS]; -int boot_menu; -bool boot_strict; uint8_t *boot_splash_filedata; int only_migratable; /* turn it off unless user states otherwise */ int icount_align_option; diff --git a/softmmu/vl.c b/softmmu/vl.c index 488cc4d09e..84a31eba76 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -93,6 +93,7 @@ #include "qemu/config-file.h" #include "qemu/qemu-options.h" #include "qemu/main-loop.h" +#include "hw/cxl/cxl.h" #ifdef CONFIG_VIRTFS #include "fsdev/qemu-fsdev.h" #endif @@ -116,8 +117,11 @@ #include "crypto/init.h" #include "sysemu/replay.h" #include "qapi/qapi-events-run-state.h" +#include "qapi/qapi-types-audio.h" +#include "qapi/qapi-visit-audio.h" #include "qapi/qapi-visit-block-core.h" #include "qapi/qapi-visit-compat.h" +#include "qapi/qapi-visit-machine.h" #include "qapi/qapi-visit-ui.h" #include "qapi/qapi-commands-block-core.h" #include "qapi/qapi-commands-migration.h" @@ -143,6 +147,12 @@ typedef struct BlockdevOptionsQueueEntry { typedef QSIMPLEQ_HEAD(, BlockdevOptionsQueueEntry) BlockdevOptionsQueue; +typedef struct CXLFMWOptionQueueEntry { + CXLFixedMemoryWindowOptions *opts; + Location loc; + QSIMPLEQ_ENTRY(CXLFMWOptionQueueEntry) entry; +} CXLFMWOptionQueueEntry; + typedef struct ObjectOption { ObjectOptions *opts; QTAILQ_ENTRY(ObjectOption) next; @@ -159,19 +169,20 @@ static const char *mem_path; static const char *incoming; static const char *loadvm; static const char *accelerators; +static bool have_custom_ram_size; +static const char *ram_memdev_id; static QDict *machine_opts_dict; static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts); static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts); -static ram_addr_t maxram_size; -static uint64_t ram_slots; static int display_remote; static int snapshot; static bool preconfig_requested; static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list); static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue); +static QSIMPLEQ_HEAD(, CXLFMWOptionQueueEntry) CXLFMW_opts = + QSIMPLEQ_HEAD_INITIALIZER(CXLFMW_opts); static bool nographic = false; static int mem_prealloc; /* force preallocation of physical target memory */ -static ram_addr_t ram_size; static const char *vga_model = NULL; static DisplayOptions dpy; static int num_serial_hds; @@ -1154,6 +1165,24 @@ static void parse_display(const char *p) } } +static void parse_cxl_fixed_memory_window(const char *optarg) +{ + CXLFMWOptionQueueEntry *cfmws_entry; + Visitor *v; + + v = qobject_input_visitor_new_str(optarg, "cxl-fixed-memory-window", + &error_fatal); + cfmws_entry = g_new(CXLFMWOptionQueueEntry, 1); + visit_type_CXLFixedMemoryWindowOptions(v, NULL, &cfmws_entry->opts, + &error_fatal); + if (!cfmws_entry->opts) { + exit(1); + } + visit_free(v); + loc_save(&cfmws_entry->loc); + QSIMPLEQ_INSERT_TAIL(&CXLFMW_opts, cfmws_entry, entry); +} + static inline bool nonempty_str(const char *str) { return str && *str; @@ -1736,6 +1765,7 @@ static void keyval_dashify(QDict *qdict, Error **errp) static void qemu_apply_legacy_machine_options(QDict *qdict) { const char *value; + QObject *prop; keyval_dashify(qdict, &error_fatal); @@ -1768,6 +1798,26 @@ static void qemu_apply_legacy_machine_options(QDict *qdict) false); qdict_del(qdict, "kernel-irqchip"); } + + value = qdict_get_try_str(qdict, "memory-backend"); + if (value) { + if (mem_path) { + error_report("'-mem-path' can't be used together with" + "'-machine memory-backend'"); + exit(EXIT_FAILURE); + } + + /* Resolved later. */ + ram_memdev_id = g_strdup(value); + qdict_del(qdict, "memory-backend"); + } + + prop = qdict_get(qdict, "memory"); + if (prop) { + have_custom_ram_size = + qobject_type(prop) == QTYPE_QDICT && + qdict_haskey(qobject_to(QDict, prop), "size"); + } } static void object_option_foreach_add(bool (*type_opt_predicate)(const char *)) @@ -1884,38 +1934,7 @@ static bool object_create_early(const char *type) static void qemu_apply_machine_options(QDict *qdict) { - MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); - const char *boot_order = NULL; - const char *boot_once = NULL; - QemuOpts *opts; - object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal); - current_machine->ram_size = ram_size; - current_machine->maxram_size = maxram_size; - current_machine->ram_slots = ram_slots; - - opts = qemu_opts_find(qemu_find_opts("boot-opts"), NULL); - if (opts) { - boot_order = qemu_opt_get(opts, "order"); - if (boot_order) { - validate_bootdevices(boot_order, &error_fatal); - } - - boot_once = qemu_opt_get(opts, "once"); - if (boot_once) { - validate_bootdevices(boot_once, &error_fatal); - } - - boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu); - boot_strict = qemu_opt_get_bool(opts, "strict", false); - } - - if (!boot_order) { - boot_order = machine_class->default_boot_order; - } - - current_machine->boot_order = boot_order; - current_machine->boot_once = boot_once; if (semihosting_enabled() && !semihosting_get_argc()) { /* fall back to the -kernel/-append */ @@ -2026,125 +2045,81 @@ static void qemu_create_late_backends(void) qemu_semihosting_console_init(); } -static bool have_custom_ram_size(void) +static void cxl_set_opts(void) { - QemuOpts *opts = qemu_find_opts_singleton("memory"); - return !!qemu_opt_get_size(opts, "size", 0); + while (!QSIMPLEQ_EMPTY(&CXLFMW_opts)) { + CXLFMWOptionQueueEntry *cfmws_entry = QSIMPLEQ_FIRST(&CXLFMW_opts); + + loc_restore(&cfmws_entry->loc); + QSIMPLEQ_REMOVE_HEAD(&CXLFMW_opts, entry); + cxl_fixed_memory_window_config(current_machine, cfmws_entry->opts, + &error_fatal); + qapi_free_CXLFixedMemoryWindowOptions(cfmws_entry->opts); + g_free(cfmws_entry); + } } static void qemu_resolve_machine_memdev(void) { - if (current_machine->ram_memdev_id) { + if (ram_memdev_id) { Object *backend; ram_addr_t backend_size; - backend = object_resolve_path_type(current_machine->ram_memdev_id, + backend = object_resolve_path_type(ram_memdev_id, TYPE_MEMORY_BACKEND, NULL); if (!backend) { - error_report("Memory backend '%s' not found", - current_machine->ram_memdev_id); + error_report("Memory backend '%s' not found", ram_memdev_id); exit(EXIT_FAILURE); } - backend_size = object_property_get_uint(backend, "size", &error_abort); - if (have_custom_ram_size() && backend_size != ram_size) { - error_report("Size specified by -m option must match size of " - "explicitly specified 'memory-backend' property"); - exit(EXIT_FAILURE); - } - if (mem_path) { - error_report("'-mem-path' can't be used together with" - "'-machine memory-backend'"); - exit(EXIT_FAILURE); - } - ram_size = backend_size; - } - - if (!xen_enabled()) { - /* On 32-bit hosts, QEMU is limited by virtual address space */ - if (ram_size > (2047 << 20) && HOST_LONG_BITS == 32) { - error_report("at most 2047 MB RAM can be simulated"); - exit(1); + if (!have_custom_ram_size) { + backend_size = object_property_get_uint(backend, "size", &error_abort); + current_machine->ram_size = backend_size; } + object_property_set_link(OBJECT(current_machine), + "memory-backend", backend, &error_fatal); } } -static void set_memory_options(MachineClass *mc) +static void parse_memory_options(const char *arg) { - uint64_t sz; + QemuOpts *opts; + QDict *dict, *prop; const char *mem_str; - const ram_addr_t default_ram_size = mc->default_ram_size; - QemuOpts *opts = qemu_find_opts_singleton("memory"); - Location loc; - loc_push_none(&loc); - qemu_opts_loc_restore(opts); + opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), arg, true); + if (!opts) { + exit(EXIT_FAILURE); + } - sz = 0; - mem_str = qemu_opt_get(opts, "size"); - if (mem_str) { + prop = qdict_new(); + + if (qemu_opt_get_size(opts, "size", 0) != 0) { + mem_str = qemu_opt_get(opts, "size"); if (!*mem_str) { error_report("missing 'size' option value"); exit(EXIT_FAILURE); } - sz = qemu_opt_get_size(opts, "size", ram_size); - /* Fix up legacy suffix-less format */ if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) { - uint64_t overflow_check = sz; - - sz *= MiB; - if (sz / MiB != overflow_check) { - error_report("too large 'size' option value"); - exit(EXIT_FAILURE); - } + g_autofree char *mib_str = g_strdup_printf("%sM", mem_str); + qdict_put_str(prop, "size", mib_str); + } else { + qdict_put_str(prop, "size", mem_str); } } - /* backward compatibility behaviour for case "-m 0" */ - if (sz == 0) { - sz = default_ram_size; - } - - sz = QEMU_ALIGN_UP(sz, 8192); - if (mc->fixup_ram_size) { - sz = mc->fixup_ram_size(sz); - } - ram_size = sz; - if (ram_size != sz) { - error_report("ram size too large"); - exit(EXIT_FAILURE); - } - - maxram_size = ram_size; - if (qemu_opt_get(opts, "maxmem")) { - uint64_t slots; - - sz = qemu_opt_get_size(opts, "maxmem", 0); - slots = qemu_opt_get_number(opts, "slots", 0); - if (sz < ram_size) { - error_report("invalid value of -m option maxmem: " - "maximum memory size (0x%" PRIx64 ") must be at least " - "the initial memory size (0x" RAM_ADDR_FMT ")", - sz, ram_size); - exit(EXIT_FAILURE); - } else if (slots && sz == ram_size) { - error_report("invalid value of -m option maxmem: " - "memory slots were specified but maximum memory size " - "(0x%" PRIx64 ") is equal to the initial memory size " - "(0x" RAM_ADDR_FMT ")", sz, ram_size); - exit(EXIT_FAILURE); - } - - maxram_size = sz; - ram_slots = slots; - } else if (qemu_opt_get(opts, "slots")) { - error_report("invalid -m option value: missing 'maxmem' option"); - exit(EXIT_FAILURE); + qdict_put_str(prop, "max-size", qemu_opt_get(opts, "maxmem")); + } + if (qemu_opt_get(opts, "slots")) { + qdict_put_str(prop, "slots", qemu_opt_get(opts, "slots")); } - loc_pop(&loc); + dict = qdict_new(); + qdict_put(dict, "memory", prop); + keyval_merge(machine_opts_dict, dict, &error_fatal); + qobject_unref(dict); } static void qemu_create_machine(QDict *qdict) @@ -2152,8 +2127,6 @@ static void qemu_create_machine(QDict *qdict) MachineClass *machine_class = select_machine(qdict, &error_fatal); object_set_machine_compat_props(machine_class->compat_props); - set_memory_options(machine_class); - current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class))); object_property_add_child(object_get_root(), "machine", OBJECT(current_machine)); @@ -2212,7 +2185,9 @@ static bool is_qemuopts_group(const char *group) { if (g_str_equal(group, "object") || g_str_equal(group, "machine") || - g_str_equal(group, "smp-opts")) { + g_str_equal(group, "smp-opts") || + g_str_equal(group, "boot-opts") || + g_str_equal(group, "memory")) { return false; } return true; @@ -2234,6 +2209,10 @@ static void qemu_record_config_group(const char *group, QDict *dict, keyval_merge(machine_opts_dict, dict, errp); } else if (g_str_equal(group, "smp-opts")) { machine_merge_property("smp", dict, &error_fatal); + } else if (g_str_equal(group, "boot-opts")) { + machine_merge_property("boot", dict, &error_fatal); + } else if (g_str_equal(group, "memory")) { + machine_merge_property("memory", dict, &error_fatal); } else { abort(); } @@ -2440,27 +2419,6 @@ static void configure_accelerators(const char *progname) } } -static void create_default_memdev(MachineState *ms, const char *path) -{ - Object *obj; - MachineClass *mc = MACHINE_GET_CLASS(ms); - - obj = object_new(path ? TYPE_MEMORY_BACKEND_FILE : TYPE_MEMORY_BACKEND_RAM); - if (path) { - object_property_set_str(obj, "mem-path", path, &error_fatal); - } - object_property_set_int(obj, "size", ms->ram_size, &error_fatal); - object_property_add_child(object_get_objects_root(), mc->default_ram_id, - obj); - /* Ensure backend's memory region name is equal to mc->default_ram_id */ - object_property_set_bool(obj, "x-use-canonical-path-for-ramblock-id", - false, &error_fatal); - user_creatable_complete(USER_CREATABLE(obj), &error_fatal); - object_unref(obj); - object_property_set_str(OBJECT(ms), "memory-backend", mc->default_ram_id, - &error_fatal); -} - static void qemu_validate_options(const QDict *machine_opts) { const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel"); @@ -2645,18 +2603,11 @@ static void qemu_init_displays(void) static void qemu_init_board(void) { - MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); - - if (machine_class->default_ram_id && current_machine->ram_size && - numa_uses_legacy_mem() && !current_machine->ram_memdev_id) { - create_default_memdev(current_machine, mem_path); - } - /* process plugin before CPUs are created, but once -smp has been parsed */ qemu_plugin_load_list(&plugin_list, &error_fatal); /* From here on we enter MACHINE_PHASE_INITIALIZED. */ - machine_run_board_init(current_machine); + machine_run_board_init(current_machine, mem_path, &error_fatal); drive_check_orphaned(); @@ -2754,6 +2705,7 @@ void qmp_x_exit_preconfig(Error **errp) qemu_init_board(); qemu_create_cli_devices(); + cxl_fixed_memory_window_link_targets(errp); qemu_machine_creation_done(); if (loadvm) { @@ -2934,6 +2886,9 @@ void qemu_init(int argc, char **argv, char **envp) exit(1); } break; + case QEMU_OPTION_cxl_fixed_memory_window: + parse_cxl_fixed_memory_window(optarg); + break; case QEMU_OPTION_display: parse_display(optarg); break; @@ -2979,11 +2934,7 @@ void qemu_init(int argc, char **argv, char **envp) drive_add(IF_DEFAULT, 2, optarg, CDROM_OPTS); break; case QEMU_OPTION_boot: - opts = qemu_opts_parse_noisily(qemu_find_opts("boot-opts"), - optarg, true); - if (!opts) { - exit(1); - } + machine_parse_property_opt(qemu_find_opts("boot-opts"), "boot", optarg); break; case QEMU_OPTION_fda: case QEMU_OPTION_fdb: @@ -3027,9 +2978,33 @@ void qemu_init(int argc, char **argv, char **envp) case QEMU_OPTION_audiodev: audio_parse_option(optarg); break; - case QEMU_OPTION_soundhw: - select_soundhw (optarg); + case QEMU_OPTION_audio: { + QDict *dict = keyval_parse(optarg, "driver", NULL, &error_fatal); + char *model; + Audiodev *dev = NULL; + Visitor *v; + + if (!qdict_haskey(dict, "id")) { + qdict_put_str(dict, "id", "audiodev0"); + } + if (!qdict_haskey(dict, "model")) { + error_setg(&error_fatal, "Parameter 'model' is missing"); + } + model = g_strdup(qdict_get_str(dict, "model")); + qdict_del(dict, "model"); + if (is_help_option(model)) { + show_valid_soundhw(); + exit(0); + } + v = qobject_input_visitor_new_keyval(QOBJECT(dict)); + qobject_unref(dict); + visit_type_Audiodev(v, NULL, &dev, &error_fatal); + visit_free(v); + audio_define(dev); + select_soundhw(model, dev->id); + g_free(model); break; + } case QEMU_OPTION_h: help(0); break; @@ -3038,11 +3013,7 @@ void qemu_init(int argc, char **argv, char **envp) exit(0); break; case QEMU_OPTION_m: - opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), - optarg, true); - if (!opts) { - exit(EXIT_FAILURE); - } + parse_memory_options(optarg); break; #ifdef CONFIG_TPM case QEMU_OPTION_tpmdev: @@ -3732,7 +3703,7 @@ void qemu_init(int argc, char **argv, char **envp) machine_class = MACHINE_GET_CLASS(current_machine); if (!qtest_enabled() && machine_class->deprecation_reason) { - error_report("Machine type '%s' is deprecated: %s", + warn_report("Machine type '%s' is deprecated: %s", machine_class->name, machine_class->deprecation_reason); } @@ -3753,6 +3724,7 @@ void qemu_init(int argc, char **argv, char **envp) qemu_resolve_machine_memdev(); parse_numa_opts(current_machine); + cxl_set_opts(); if (vmstate_dump_file) { /* dump and exit */ diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index d0041c864b..b4cc3c2d68 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -99,7 +99,7 @@ static inline bool vu_has_protocol_feature(VuDev *dev, unsigned int fbit) return has_feature(dev->protocol_features, fbit); } -static const char * +const char * vu_request_to_string(unsigned int req) { #define REQ(req) [req] = #req diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index cde9f07bb3..aea7ec5061 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -473,6 +473,15 @@ bool vu_init(VuDev *dev, */ void vu_deinit(VuDev *dev); + +/** + * vu_request_to_string: return string for vhost message request + * @req: VhostUserMsg request + * + * Returns a const string, do not free. + */ +const char *vu_request_to_string(unsigned int req); + /** * vu_dispatch: * @dev: a VuDev context diff --git a/target/alpha/cpu-param.h b/target/alpha/cpu-param.h index 1153992e42..17cd14e590 100644 --- a/target/alpha/cpu-param.h +++ b/target/alpha/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef ALPHA_CPU_PARAM_H -#define ALPHA_CPU_PARAM_H 1 +#define ALPHA_CPU_PARAM_H #define TARGET_LONG_BITS 64 #define TARGET_PAGE_BITS 13 diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index b59d505761..68ffb12427 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef ARM_CPU_PARAM_H -#define ARM_CPU_PARAM_H 1 +#define ARM_CPU_PARAM_H #ifdef TARGET_AARCH64 # define TARGET_LONG_BITS 64 diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h index 09010ad2da..78a84c1414 100644 --- a/target/arm/translate-a32.h +++ b/target/arm/translate-a32.h @@ -17,8 +17,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TARGET_ARM_TRANSLATE_A64_H -#define TARGET_ARM_TRANSLATE_A64_H +#ifndef TARGET_ARM_TRANSLATE_A32_H +#define TARGET_ARM_TRANSLATE_A32_H /* Prototypes for autogenerated disassembler functions */ bool disas_m_nocp(DisasContext *dc, uint32_t insn); diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h index fb43a2380e..1d63402042 100644 --- a/target/arm/vec_internal.h +++ b/target/arm/vec_internal.h @@ -17,8 +17,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TARGET_ARM_VEC_INTERNALS_H -#define TARGET_ARM_VEC_INTERNALS_H +#ifndef TARGET_ARM_VEC_INTERNAL_H +#define TARGET_ARM_VEC_INTERNAL_H /* * Note that vector data is stored in host-endian 64-bit chunks, @@ -217,4 +217,4 @@ uint64_t pmull_h(uint64_t op1, uint64_t op2); */ uint64_t pmull_w(uint64_t op1, uint64_t op2); -#endif /* TARGET_ARM_VEC_INTERNALS_H */ +#endif /* TARGET_ARM_VEC_INTERNAL_H */ diff --git a/target/avr/cpu-qom.h b/target/avr/cpu-qom.h index 32a1c762e6..b5c3507d6d 100644 --- a/target/avr/cpu-qom.h +++ b/target/avr/cpu-qom.h @@ -18,8 +18,8 @@ * <http://www.gnu.org/licenses/lgpl-2.1.html> */ -#ifndef QEMU_AVR_QOM_H -#define QEMU_AVR_QOM_H +#ifndef TARGET_AVR_CPU_QOM_H +#define TARGET_AVR_CPU_QOM_H #include "hw/core/cpu.h" #include "qom/object.h" @@ -44,4 +44,4 @@ struct AVRCPUClass { }; -#endif /* !defined (QEMU_AVR_CPU_QOM_H) */ +#endif /* TARGET_AVR_CPU_QOM_H */ diff --git a/target/avr/cpu.h b/target/avr/cpu.h index 55497f851d..d304f33301 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -247,4 +247,4 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, #include "exec/cpu-all.h" -#endif /* !defined (QEMU_AVR_CPU_H) */ +#endif /* QEMU_AVR_CPU_H */ diff --git a/target/cris/cpu-param.h b/target/cris/cpu-param.h index 36a3058761..12ec22d8df 100644 --- a/target/cris/cpu-param.h +++ b/target/cris/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef CRIS_CPU_PARAM_H -#define CRIS_CPU_PARAM_H 1 +#define CRIS_CPU_PARAM_H #define TARGET_LONG_BITS 32 #define TARGET_PAGE_BITS 13 diff --git a/target/hexagon/attribs.h b/target/hexagon/attribs.h index 54576f4143..d51bb4f732 100644 --- a/target/hexagon/attribs.h +++ b/target/hexagon/attribs.h @@ -32,4 +32,4 @@ extern DECLARE_BITMAP(opcode_attribs[XX_LAST_OPCODE], A_ZZ_LASTATTRIB); #define GET_ATTRIB(opcode, attrib) \ test_bit(attrib, opcode_attribs[opcode]) -#endif /* ATTRIBS_H */ +#endif /* HEXAGON_ATTRIBS_H */ diff --git a/target/hexagon/hex_arch_types.h b/target/hexagon/hex_arch_types.h index 78ad607f53..885f68f760 100644 --- a/target/hexagon/hex_arch_types.h +++ b/target/hexagon/hex_arch_types.h @@ -15,8 +15,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HEXAGON_ARCH_TYPES_H -#define HEXAGON_ARCH_TYPES_H +#ifndef HEXAGON_HEX_ARCH_TYPES_H +#define HEXAGON_HEX_ARCH_TYPES_H #include "qemu/osdep.h" #include "mmvec/mmvec.h" diff --git a/target/hexagon/hex_regs.h b/target/hexagon/hex_regs.h index e1b3149b07..a63c2c0fd5 100644 --- a/target/hexagon/hex_regs.h +++ b/target/hexagon/hex_regs.h @@ -15,8 +15,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HEXAGON_REGS_H -#define HEXAGON_REGS_H +#ifndef HEXAGON_HEX_REGS_H +#define HEXAGON_HEX_REGS_H enum { HEX_REG_R00 = 0, diff --git a/target/hppa/cpu-param.h b/target/hppa/cpu-param.h index a97d1428df..a48a2701ae 100644 --- a/target/hppa/cpu-param.h +++ b/target/hppa/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef HPPA_CPU_PARAM_H -#define HPPA_CPU_PARAM_H 1 +#define HPPA_CPU_PARAM_H #ifdef TARGET_HPPA64 # define TARGET_LONG_BITS 64 diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h index 57abc64c0d..9740bd7abd 100644 --- a/target/i386/cpu-param.h +++ b/target/i386/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef I386_CPU_PARAM_H -#define I386_CPU_PARAM_H 1 +#define I386_CPU_PARAM_H #ifdef TARGET_X86_64 # define TARGET_LONG_BITS 64 diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 62c240fa91..35c3475e6c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -855,7 +855,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "fsrm", NULL, NULL, NULL, "avx512-vp2intersect", NULL, "md-clear", NULL, NULL, NULL, "serialize", NULL, - "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", NULL, NULL, "amx-bf16", "avx512-fp16", "amx-tile", "amx-int8", "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", @@ -937,6 +937,34 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_XSAVE_FEATURES, }, + [FEAT_XSAVE_XSS_LO] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, + .ecx = 1, + .reg = R_ECX, + }, + }, + [FEAT_XSAVE_XSS_HI] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, + .ecx = 1, + .reg = R_EDX + }, + }, [FEAT_6_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -952,7 +980,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid = { .eax = 6, .reg = R_EAX, }, .tcg_features = TCG_6_EAX_FEATURES, }, - [FEAT_XSAVE_COMP_LO] = { + [FEAT_XSAVE_XCR0_LO] = { .type = CPUID_FEATURE_WORD, .cpuid = { .eax = 0xD, @@ -965,7 +993,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK, }, - [FEAT_XSAVE_COMP_HI] = { + [FEAT_XSAVE_XCR0_HI] = { .type = CPUID_FEATURE_WORD, .cpuid = { .eax = 0xD, @@ -1382,6 +1410,9 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -1414,6 +1445,10 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_PKRU_BIT] = { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, .size = sizeof(XSavePKRU) }, + [XSTATE_ARCH_LBR_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_ARCH_LBR, + .offset = 0 /*supervisor mode component, offset = 0 */, + .size = sizeof(XSavesArchLBR) }, [XSTATE_XTILE_CFG_BIT] = { .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, .size = sizeof(XSaveXTILECFG), @@ -1424,15 +1459,18 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { }, }; -static uint32_t xsave_area_size(uint64_t mask) +static uint32_t xsave_area_size(uint64_t mask, bool compacted) { + uint64_t ret = x86_ext_save_areas[0].size; + const ExtSaveArea *esa; + uint32_t offset = 0; int i; - uint64_t ret = 0; - for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) { - const ExtSaveArea *esa = &x86_ext_save_areas[i]; + for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) { + esa = &x86_ext_save_areas[i]; if ((mask >> i) & 1) { - ret = MAX(ret, esa->offset + esa->size); + offset = compacted ? ret : esa->offset; + ret = MAX(ret, offset + esa->size); } } return ret; @@ -1443,10 +1481,10 @@ static inline bool accel_uses_host_cpuid(void) return kvm_enabled() || hvf_enabled(); } -static inline uint64_t x86_cpu_xsave_components(X86CPU *cpu) +static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) { - return ((uint64_t)cpu->env.features[FEAT_XSAVE_COMP_HI]) << 32 | - cpu->env.features[FEAT_XSAVE_COMP_LO]; + return ((uint64_t)cpu->env.features[FEAT_XSAVE_XCR0_HI]) << 32 | + cpu->env.features[FEAT_XSAVE_XCR0_LO]; } /* Return name of 32-bit register, from a R_* constant */ @@ -1458,6 +1496,12 @@ static const char *get_register_name_32(unsigned int reg) return x86_reg_info_32[reg].name; } +static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu) +{ + return ((uint64_t)cpu->env.features[FEAT_XSAVE_XSS_HI]) << 32 | + cpu->env.features[FEAT_XSAVE_XSS_LO]; +} + /* * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. @@ -3259,128 +3303,6 @@ static const X86CPUDefinition builtin_x86_defs[] = { } }, { - .name = "Icelake-Client", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 126, - .stepping = 0, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, - .features[FEAT_8000_0008_EBX] = - CPUID_8000_0008_EBX_WBNOINVD, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP, - .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | - CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | - CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | - CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | - CPUID_7_0_ECX_AVX512_VPOPCNTDQ, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, - /* XSAVES is added in version 3 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ - .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | - MSR_VMX_BASIC_TRUE_CTLS, - .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | - VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | - VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, - .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | - MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | - MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | - MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | - MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | - MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | - MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, - .features[FEAT_VMX_EXIT_CTLS] = - VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | - VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | - VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | - VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, - .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | - MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, - .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | - VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | - VMX_PIN_BASED_VMX_PREEMPTION_TIMER, - .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | - VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | - VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | - VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | - VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | - VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | - VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | - VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | - VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | - VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | - VMX_CPU_BASED_MONITOR_TRAP_FLAG | - VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, - .features[FEAT_VMX_SECONDARY_CTLS] = - VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | - VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | - VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | - VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | - VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | - VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, - .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Icelake)", - .versions = (X86CPUVersionDefinition[]) { - { - .version = 1, - .note = "deprecated" - }, - { - .version = 2, - .note = "no TSX, deprecated", - .alias = "Icelake-Client-noTSX", - .props = (PropValue[]) { - { "hle", "off" }, - { "rtm", "off" }, - { /* end of list */ } - }, - }, - { - .version = 3, - .note = "no TSX, XSAVES, deprecated", - .props = (PropValue[]) { - { "xsaves", "on" }, - { "vmx-xsaves", "on" }, - { /* end of list */ } - }, - }, - { /* end of list */ } - }, - .deprecation_note = "use Icelake-Server instead" - }, - { .name = "Icelake-Server", .level = 0xd, .vendor = CPUID_VENDOR_INTEL, @@ -4633,8 +4555,8 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) /* XSAVE components are automatically enabled by other features, * so return the original feature name instead */ - if (w == FEAT_XSAVE_COMP_LO || w == FEAT_XSAVE_COMP_HI) { - int comp = (w == FEAT_XSAVE_COMP_HI) ? bitnr + 32 : bitnr; + if (w == FEAT_XSAVE_XCR0_LO || w == FEAT_XSAVE_XCR0_HI) { + int comp = (w == FEAT_XSAVE_XCR0_HI) ? bitnr + 32 : bitnr; if (comp < ARRAY_SIZE(x86_ext_save_areas) && x86_ext_save_areas[comp].bits) { @@ -5022,6 +4944,59 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, return r; } +static void x86_cpu_get_supported_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + if (kvm_enabled()) { + *eax = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_EAX); + *ebx = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_EBX); + *ecx = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_ECX); + *edx = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_EDX); + } else if (hvf_enabled()) { + *eax = hvf_get_supported_cpuid(func, index, R_EAX); + *ebx = hvf_get_supported_cpuid(func, index, R_EBX); + *ecx = hvf_get_supported_cpuid(func, index, R_ECX); + *edx = hvf_get_supported_cpuid(func, index, R_EDX); + } else { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + } +} + +static void x86_cpu_get_cache_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + uint32_t level, unused; + + /* Only return valid host leaves. */ + switch (func) { + case 2: + case 4: + host_cpuid(0, 0, &level, &unused, &unused, &unused); + break; + case 0x80000005: + case 0x80000006: + case 0x8000001d: + host_cpuid(0x80000000, 0, &level, &unused, &unused, &unused); + break; + default: + return; + } + + if (func > level) { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + } else { + host_cpuid(func, index, eax, ebx, ecx, edx); + } +} + /* * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. */ @@ -5280,7 +5255,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 2: /* cache info: needed for Pentium Pro compatibility */ if (cpu->cache_info_passthrough) { - host_cpuid(index, 0, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { *eax = *ebx = *ecx = *edx = 0; @@ -5300,7 +5275,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 4: /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { - host_cpuid(index, count, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); /* QEMU gives out its own APIC IDs, never pass down bits 31..26. */ *eax &= ~0xFC000000; if ((*eax & 31) && cs->nr_cores > 1) { @@ -5406,18 +5381,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0xA: /* Architectural Performance Monitoring Leaf */ - if (kvm_enabled() && cpu->enable_pmu) { - KVMState *s = cs->kvm_state; - - *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX); - *ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX); - *ecx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_ECX); - *edx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EDX); - } else if (hvf_enabled() && cpu->enable_pmu) { - *eax = hvf_get_supported_cpuid(0xA, count, R_EAX); - *ebx = hvf_get_supported_cpuid(0xA, count, R_EBX); - *ecx = hvf_get_supported_cpuid(0xA, count, R_ECX); - *edx = hvf_get_supported_cpuid(0xA, count, R_EDX); + if (accel_uses_host_cpuid() && cpu->enable_pmu) { + x86_cpu_get_supported_cpuid(0xA, count, eax, ebx, ecx, edx); } else { *eax = 0; *ebx = 0; @@ -5455,6 +5420,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, assert(!(*eax & ~0x1f)); *ebx &= 0xffff; /* The count doesn't need to be reliable. */ break; + case 0x1C: + if (accel_uses_host_cpuid() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); + *edx = 0; + } + break; case 0x1F: /* V2 Extended Topology Enumeration Leaf */ if (env->nr_dies < 2) { @@ -5499,25 +5471,47 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } if (count == 0) { - *ecx = xsave_area_size(x86_cpu_xsave_components(cpu)); - *eax = env->features[FEAT_XSAVE_COMP_LO]; - *edx = env->features[FEAT_XSAVE_COMP_HI]; + *ecx = xsave_area_size(x86_cpu_xsave_xcr0_components(cpu), false); + *eax = env->features[FEAT_XSAVE_XCR0_LO]; + *edx = env->features[FEAT_XSAVE_XCR0_HI]; /* * The initial value of xcr0 and ebx == 0, On host without kvm * commit 412a3c41(e.g., CentOS 6), the ebx's value always == 0 * even through guest update xcr0, this will crash some legacy guest * (e.g., CentOS 6), So set ebx == ecx to workaroud it. */ - *ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0); + *ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0, false); } else if (count == 1) { + uint64_t xstate = x86_cpu_xsave_xcr0_components(cpu) | + x86_cpu_xsave_xss_components(cpu); + *eax = env->features[FEAT_XSAVE]; + *ebx = xsave_area_size(xstate, true); + *ecx = env->features[FEAT_XSAVE_XSS_LO]; + *edx = env->features[FEAT_XSAVE_XSS_HI]; + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR) && + (*eax & CPUID_XSAVE_XSAVES)) { + *ecx |= XSTATE_ARCH_LBR_MASK; + } else { + *ecx &= ~XSTATE_ARCH_LBR_MASK; + } + } else if (count == 0xf && + accel_uses_host_cpuid() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0xD, count, eax, ebx, ecx, edx); } else if (count < ARRAY_SIZE(x86_ext_save_areas)) { - if ((x86_cpu_xsave_components(cpu) >> count) & 1) { - const ExtSaveArea *esa = &x86_ext_save_areas[count]; + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + + if (x86_cpu_xsave_xcr0_components(cpu) & (1ULL << count)) { *eax = esa->size; *ebx = esa->offset; *ecx = esa->ecx & (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); + } else if (x86_cpu_xsave_xss_components(cpu) & (1ULL << count)) { + *eax = esa->size; + *ebx = 0; + *ecx = 1; } } break; @@ -5557,10 +5551,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, * supports. Features can be further restricted by userspace, but not * made more permissive. */ - *eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_EAX); - *ebx = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_EBX); - *ecx = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_ECX); - *edx = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_EDX); + x86_cpu_get_supported_cpuid(0x12, index, eax, ebx, ecx, edx); if (count == 0) { *eax &= env->features[FEAT_SGX_12_0_EAX]; @@ -5568,8 +5559,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } else { *eax &= env->features[FEAT_SGX_12_1_EAX]; *ebx &= 0; /* ebx reserve */ - *ecx &= env->features[FEAT_XSAVE_COMP_LO]; - *edx &= env->features[FEAT_XSAVE_COMP_HI]; + *ecx &= env->features[FEAT_XSAVE_XSS_LO]; + *edx &= env->features[FEAT_XSAVE_XSS_HI]; /* FP and SSE are always allowed regardless of XSAVE/XCR0. */ *ecx |= XSTATE_FP_MASK | XSTATE_SSE_MASK; @@ -5702,7 +5693,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x80000005: /* cache info (L1 cache) */ if (cpu->cache_info_passthrough) { - host_cpuid(index, 0, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | @@ -5715,7 +5706,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x80000006: /* cache info (L2 cache) */ if (cpu->cache_info_passthrough) { - host_cpuid(index, 0, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | @@ -5775,7 +5766,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x8000001D: *eax = 0; if (cpu->cache_info_passthrough) { - host_cpuid(index, count, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); break; } switch (count) { @@ -5965,6 +5956,9 @@ static void x86_cpu_reset(DeviceState *dev) } for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) { const ExtSaveArea *esa = &x86_ext_save_areas[i]; + if (!((1 << i) & CPUID_XSTATE_XCR0_MASK)) { + continue; + } if (env->features[esa->feature] & esa->bits) { xcr0 |= 1ull << i; } @@ -6079,8 +6073,8 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) static bool request_perm; if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - env->features[FEAT_XSAVE_COMP_LO] = 0; - env->features[FEAT_XSAVE_COMP_HI] = 0; + env->features[FEAT_XSAVE_XCR0_LO] = 0; + env->features[FEAT_XSAVE_XCR0_HI] = 0; return; } @@ -6098,8 +6092,10 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) request_perm = true; } - env->features[FEAT_XSAVE_COMP_LO] = mask; - env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; + env->features[FEAT_XSAVE_XCR0_LO] = mask & CPUID_XSTATE_XCR0_MASK; + env->features[FEAT_XSAVE_XCR0_HI] = mask >> 32; + env->features[FEAT_XSAVE_XSS_LO] = mask & CPUID_XSTATE_XSS_MASK; + env->features[FEAT_XSAVE_XSS_HI] = mask >> 32; } /***** Steps involved on loading and filtering CPUID data @@ -6366,6 +6362,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) CPUX86State *env = &cpu->env; Error *local_err = NULL; static bool ht_warned; + unsigned requested_lbr_fmt; if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); @@ -6383,6 +6380,42 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) goto out; } + /* + * Override env->features[FEAT_PERF_CAPABILITIES].LBR_FMT + * with user-provided setting. + */ + if (cpu->lbr_fmt != ~PERF_CAP_LBR_FMT) { + if ((cpu->lbr_fmt & PERF_CAP_LBR_FMT) != cpu->lbr_fmt) { + error_setg(errp, "invalid lbr-fmt"); + return; + } + env->features[FEAT_PERF_CAPABILITIES] &= ~PERF_CAP_LBR_FMT; + env->features[FEAT_PERF_CAPABILITIES] |= cpu->lbr_fmt; + } + + /* + * vPMU LBR is supported when 1) KVM is enabled 2) Option pmu=on and + * 3)vPMU LBR format matches that of host setting. + */ + requested_lbr_fmt = + env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_LBR_FMT; + if (requested_lbr_fmt && kvm_enabled()) { + uint64_t host_perf_cap = + x86_cpu_get_supported_feature_word(FEAT_PERF_CAPABILITIES, false); + unsigned host_lbr_fmt = host_perf_cap & PERF_CAP_LBR_FMT; + + if (!cpu->enable_pmu) { + error_setg(errp, "vPMU: LBR is unsupported without pmu=on"); + return; + } + if (requested_lbr_fmt != host_lbr_fmt) { + error_setg(errp, "vPMU: the lbr-fmt value (0x%x) does not match " + "the host value (0x%x).", + requested_lbr_fmt, host_lbr_fmt); + return; + } + } + x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { @@ -6735,6 +6768,8 @@ static void x86_cpu_initfn(Object *obj) object_property_add_alias(obj, "sse4_2", obj, "sse4.2"); object_property_add_alias(obj, "hv-apicv", obj, "hv-avic"); + cpu->lbr_fmt = ~PERF_CAP_LBR_FMT; + object_property_add_alias(obj, "lbr_fmt", obj, "lbr-fmt"); if (xcc->model) { x86_cpu_load_model(cpu, xcc->model); @@ -6889,6 +6924,7 @@ static Property x86_cpu_properties[] = { #endif DEFINE_PROP_INT32("node-id", X86CPU, node_id, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), + DEFINE_PROP_UINT64_CHECKMASK("lbr-fmt", X86CPU, lbr_fmt, PERF_CAP_LBR_FMT), DEFINE_PROP_UINT32("hv-spinlocks", X86CPU, hyperv_spinlock_attempts, HYPERV_SPINLOCK_NEVER_NOTIFY), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9661f9fbd1..0d528ac58f 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -386,10 +386,16 @@ typedef enum X86Seg { #define ARCH_CAP_TSX_CTRL_MSR (1<<7) #define MSR_IA32_PERF_CAPABILITIES 0x345 +#define PERF_CAP_LBR_FMT 0x3f #define MSR_IA32_TSX_CTRL 0x122 #define MSR_IA32_TSCDEADLINE 0x6e0 #define MSR_IA32_PKRS 0x6e1 +#define MSR_ARCH_LBR_CTL 0x000014ce +#define MSR_ARCH_LBR_DEPTH 0x000014cf +#define MSR_ARCH_LBR_FROM_0 0x00001500 +#define MSR_ARCH_LBR_TO_0 0x00001600 +#define MSR_ARCH_LBR_INFO_0 0x00001200 #define FEATURE_CONTROL_LOCKED (1<<0) #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1ULL << 1) @@ -543,6 +549,7 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 #define XSTATE_PKRU_BIT 9 +#define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 #define XSTATE_XTILE_DATA_BIT 18 @@ -555,6 +562,7 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) +#define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) #define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) @@ -567,6 +575,14 @@ typedef enum X86Seg { #define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) +/* CPUID feature bits available in XCR0 */ +#define CPUID_XSTATE_XCR0_MASK (XSTATE_FP_MASK | XSTATE_SSE_MASK | \ + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | \ + XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | \ + XSTATE_ZMM_Hi256_MASK | \ + XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ + XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -585,8 +601,8 @@ typedef enum FeatureWord { FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ - FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ - FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ + FEAT_XSAVE_XCR0_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ + FEAT_XSAVE_XCR0_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ FEAT_ARCH_CAPABILITIES, FEAT_CORE_CAPABILITY, FEAT_PERF_CAPABILITIES, @@ -603,6 +619,8 @@ typedef enum FeatureWord { FEAT_SGX_12_0_EAX, /* CPUID[EAX=0x12,ECX=0].EAX (SGX) */ FEAT_SGX_12_0_EBX, /* CPUID[EAX=0x12,ECX=0].EBX (SGX MISCSELECT[31:0]) */ FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ + FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ + FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ FEATURE_WORDS, } FeatureWord; @@ -859,6 +877,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) +/* Architectural LBRs */ +#define CPUID_7_0_EDX_ARCH_LBR (1U << 19) /* AVX512_FP16 instruction */ #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) /* AMX tile (two-dimensional register) */ @@ -1365,6 +1385,24 @@ typedef struct XSaveXTILEDATA { uint8_t xtiledata[8][1024]; } XSaveXTILEDATA; +typedef struct { + uint64_t from; + uint64_t to; + uint64_t info; +} LBREntry; + +#define ARCH_LBR_NR_ENTRIES 32 + +/* Ext. save area 19: Supervisor mode Arch LBR state */ +typedef struct XSavesArchLBR { + uint64_t lbr_ctl; + uint64_t lbr_depth; + uint64_t ler_from; + uint64_t ler_to; + uint64_t ler_info; + LBREntry lbr_records[ARCH_LBR_NR_ENTRIES]; +} XSavesArchLBR; + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); @@ -1374,6 +1412,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); +QEMU_BUILD_BUG_ON(sizeof(XSavesArchLBR) != 0x328); typedef struct ExtSaveArea { uint32_t feature, bits; @@ -1616,6 +1655,11 @@ typedef struct CPUArchState { uint64_t msr_xfd; uint64_t msr_xfd_err; + /* Per-VCPU Arch LBR MSRs */ + uint64_t msr_lbr_ctl; + uint64_t msr_lbr_depth; + LBREntry lbr_records[ARCH_LBR_NR_ENTRIES]; + /* exception/interrupt handling */ int error_code; int exception_is_int; @@ -1810,6 +1854,15 @@ struct ArchCPU { */ bool enable_pmu; + /* + * Enable LBR_FMT bits of IA32_PERF_CAPABILITIES MSR. + * This can't be initialized with a default because it doesn't have + * stable ABI support yet. It is only allowed to pass all LBR_FMT bits + * returned by kvm_arch_get_supported_msr_feature()(which depends on both + * host CPU and kernel capabilities) to the guest. + */ + uint64_t lbr_fmt; + /* LMCE support can be enabled/disabled via cpu option 'lmce=on/off'. It is * disabled by default to avoid breaking migration between QEMU with * different LMCE configurations. diff --git a/target/i386/hax/hax-accel-ops.h b/target/i386/hax/hax-accel-ops.h index c7698519cd..9e357e7b40 100644 --- a/target/i386/hax/hax-accel-ops.h +++ b/target/i386/hax/hax-accel-ops.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef HAX_CPUS_H -#define HAX_CPUS_H +#ifndef TARGET_I386_HAX_ACCEL_OPS_H +#define TARGET_I386_HAX_ACCEL_OPS_H #include "sysemu/cpus.h" @@ -28,4 +28,4 @@ int hax_vcpu_destroy(CPUState *cpu); void hax_raise_event(CPUState *cpu); void hax_reset_vcpu_state(void *opaque); -#endif /* HAX_CPUS_H */ +#endif /* TARGET_I386_HAX_ACCEL_OPS_H */ diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 5eb955ce9a..7237378a7d 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -171,7 +171,7 @@ static void kvm_cpu_instance_init(CPUState *cs) /* only applies to builtin_x86_defs cpus */ if (!kvm_irqchip_in_kernel()) { x86_cpu_change_kvm_default("x2apic", "off"); - } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) { + } else if (kvm_irqchip_is_split()) { x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on"); } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c885763a5b..a9ee8eebd7 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -141,6 +141,7 @@ static struct kvm_msr_list *kvm_feature_msrs; #define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ static RateLimit bus_lock_ratelimit_ctrl; +static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); int kvm_has_pit_state2(void) { @@ -211,28 +212,21 @@ static int kvm_get_tsc(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data = {}; + uint64_t value; int ret; if (env->tsc_valid) { return 0; } - memset(&msr_data, 0, sizeof(msr_data)); - msr_data.info.nmsrs = 1; - msr_data.entries[0].index = MSR_IA32_TSC; env->tsc_valid = !runstate_is_running(); - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + ret = kvm_get_one_msr(cpu, MSR_IA32_TSC, &value); if (ret < 0) { return ret; } - assert(ret == 1); - env->tsc = msr_data.entries[0].data; + env->tsc = value; return 0; } @@ -1566,21 +1560,14 @@ static int hyperv_init_vcpu(X86CPU *cpu) * the kernel doesn't support setting vp_index; assert that its value * is in sync */ - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data = { - .info.nmsrs = 1, - .entries[0].index = HV_X64_MSR_VP_INDEX, - }; + uint64_t value; - ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); + ret = kvm_get_one_msr(cpu, HV_X64_MSR_VP_INDEX, &value); if (ret < 0) { return ret; } - assert(ret == 1); - if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { + if (value != hyperv_vp_index(CPU(cpu))) { error_report("kernel's vp_index != QEMU's vp_index"); return -ENXIO; } @@ -2839,6 +2826,25 @@ static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); } +static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value) +{ + int ret; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data = { + .info.nmsrs = 1, + .entries[0].index = index, + }; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + assert(ret == 1); + *value = msr_data.entries[0].data; + return ret; +} void kvm_put_apicbase(X86CPU *cpu, uint64_t value) { int ret; @@ -3361,6 +3367,38 @@ static int kvm_put_msrs(X86CPU *cpu, int level) env->msr_xfd_err); } + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + uint64_t depth; + int i, ret; + + /* + * Only migrate Arch LBR states when: 1) Arch LBR is enabled + * for migrated vcpu. 2) the host Arch LBR depth equals that + * of source guest's, this is to avoid mismatch of guest/host + * config for the msr hence avoid unexpected misbehavior. + */ + ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth); + + if (ret == 1 && (env->msr_lbr_ctl & 0x1) && !!depth && + depth == env->msr_lbr_depth) { + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, env->msr_lbr_ctl); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, env->msr_lbr_depth); + + for (i = 0; i < ARCH_LBR_NR_ENTRIES; i++) { + if (!env->lbr_records[i].from) { + continue; + } + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_FROM_0 + i, + env->lbr_records[i].from); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_TO_0 + i, + env->lbr_records[i].to); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_INFO_0 + i, + env->lbr_records[i].info); + } + } + } + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ } @@ -3761,6 +3799,26 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); } + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + uint64_t ctl, depth; + int i, ret2; + + ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_CTL, &ctl); + ret2 = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth); + if (ret == 1 && ret2 == 1 && (ctl & 0x1) && + depth == ARCH_LBR_NR_ENTRIES) { + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, 0); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, 0); + + for (i = 0; i < ARCH_LBR_NR_ENTRIES; i++) { + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_FROM_0 + i, 0); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_TO_0 + i, 0); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_INFO_0 + i, 0); + } + } + } + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); if (ret < 0) { return ret; @@ -4066,6 +4124,21 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_XFD_ERR: env->msr_xfd_err = msrs[i].data; break; + case MSR_ARCH_LBR_CTL: + env->msr_lbr_ctl = msrs[i].data; + break; + case MSR_ARCH_LBR_DEPTH: + env->msr_lbr_depth = msrs[i].data; + break; + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_FROM_0].from = msrs[i].data; + break; + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_TO_0].to = msrs[i].data; + break; + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_INFO_0].info = msrs[i].data; + break; } } diff --git a/target/i386/machine.c b/target/i386/machine.c index 7c54bada81..cecd476e98 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -136,6 +136,22 @@ static const VMStateDescription vmstate_mtrr_var = { #define VMSTATE_MTRR_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar) +static const VMStateDescription vmstate_lbr_records_var = { + .name = "lbr_records_var", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(from, LBREntry), + VMSTATE_UINT64(to, LBREntry), + VMSTATE_UINT64(info, LBREntry), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_LBR_VARS(_field, _state, _n, _v) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_lbr_records_var, \ + LBREntry) + typedef struct x86_FPReg_tmp { FPReg *parent; uint64_t tmp_mant; @@ -1525,6 +1541,27 @@ static const VMStateDescription vmstate_amx_xtile = { }; #endif +static bool arch_lbr_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR); +} + +static const VMStateDescription vmstate_arch_lbr = { + .name = "cpu/arch_lbr", + .version_id = 1, + .minimum_version_id = 1, + .needed = arch_lbr_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.msr_lbr_ctl, X86CPU), + VMSTATE_UINT64(env.msr_lbr_depth, X86CPU), + VMSTATE_LBR_VARS(env.lbr_records, X86CPU, ARCH_LBR_NR_ENTRIES, 1), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1668,6 +1705,7 @@ const VMStateDescription vmstate_x86_cpu = { #ifdef TARGET_X86_64 &vmstate_amx_xtile, #endif + &vmstate_arch_lbr, NULL } }; diff --git a/target/i386/nvmm/nvmm-accel-ops.h b/target/i386/nvmm/nvmm-accel-ops.h index 43e24adcaf..7c5461bd75 100644 --- a/target/i386/nvmm/nvmm-accel-ops.h +++ b/target/i386/nvmm/nvmm-accel-ops.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef NVMM_CPUS_H -#define NVMM_CPUS_H +#ifndef TARGET_I386_NVMM_ACCEL_OPS_H +#define TARGET_I386_NVMM_ACCEL_OPS_H #include "sysemu/cpus.h" @@ -21,4 +21,4 @@ void nvmm_cpu_synchronize_post_reset(CPUState *cpu); void nvmm_cpu_synchronize_post_init(CPUState *cpu); void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu); -#endif /* NVMM_CPUS_H */ +#endif /* TARGET_I386_NVMM_ACCEL_OPS_H */ diff --git a/target/i386/sev.h b/target/i386/sev.h index 83e82aa42c..7b1528248a 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -11,8 +11,8 @@ * */ -#ifndef QEMU_SEV_I386_H -#define QEMU_SEV_I386_H +#ifndef I386_SEV_H +#define I386_SEV_H #ifndef CONFIG_USER_ONLY #include CONFIG_DEVICES /* CONFIG_SEV */ diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h index b5102dd1ee..7a1bb1ab57 100644 --- a/target/i386/whpx/whpx-accel-ops.h +++ b/target/i386/whpx/whpx-accel-ops.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef WHPX_CPUS_H -#define WHPX_CPUS_H +#ifndef TARGET_I386_WHPX_ACCEL_OPS_H +#define TARGET_I386_WHPX_ACCEL_OPS_H #include "sysemu/cpus.h" @@ -30,4 +30,4 @@ void whpx_cpu_synchronize_pre_resume(bool step_pending); /* full state set, modified during initialization or on vmload */ #define WHPX_SET_FULL_STATE 3 -#endif /* WHPX_CPUS_H */ +#endif /* TARGET_I386_WHPX_ACCEL_OPS_H */ diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index 23ae639b23..b22a3314b4 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -373,6 +373,8 @@ static int whpx_set_tsc(CPUState *cpu) * * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64 * and IA-32 Architectures Software Developer's Manual. + * + * The functions below translate the value of CR8 to TPR and vice versa. */ static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr) @@ -380,6 +382,11 @@ static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr) return tpr >> 4; } +static uint64_t whpx_cr8_to_apic_tpr(uint64_t cr8) +{ + return cr8 << 4; +} + static void whpx_set_registers(CPUState *cpu, int level) { struct whpx_state *whpx = &whpx_global; @@ -687,7 +694,7 @@ static void whpx_get_registers(CPUState *cpu) tpr = vcxt.values[idx++].Reg64; if (tpr != vcpu->tpr) { vcpu->tpr = tpr; - cpu_set_apic_tpr(x86_cpu->apic_state, tpr); + cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(tpr)); } /* 8 Debug Registers - Skipped */ @@ -1547,7 +1554,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) } /* Sync the TPR to the CR8 if was modified during the intercept */ - tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); if (tpr != vcpu->tpr) { vcpu->tpr = tpr; reg_values[reg_count].Reg64 = tpr; @@ -1596,7 +1603,7 @@ static void whpx_vcpu_post_run(CPUState *cpu) if (vcpu->tpr != tpr) { vcpu->tpr = tpr; qemu_mutex_lock_iothread(); - cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr); + cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr)); qemu_mutex_unlock_iothread(); } diff --git a/target/i386/whpx/whpx-internal.h b/target/i386/whpx/whpx-internal.h index dbb7e7ba82..06429d8ccd 100644 --- a/target/i386/whpx/whpx-internal.h +++ b/target/i386/whpx/whpx-internal.h @@ -1,5 +1,5 @@ -#ifndef WHP_INTERNAL_H -#define WHP_INTERNAL_H +#ifndef TARGET_I386_WHPX_INTERNAL_H +#define TARGET_I386_WHPX_INTERNAL_H #include <windows.h> #include <WinHvPlatform.h> @@ -116,4 +116,4 @@ typedef enum WHPFunctionList { WINHV_PLATFORM_FNS_SUPPLEMENTAL } WHPFunctionList; -#endif /* WHP_INTERNAL_H */ +#endif /* TARGET_I386_WHPX_INTERNAL_H */ diff --git a/target/m68k/cpu-param.h b/target/m68k/cpu-param.h index 06556dfbf3..44a8d193f0 100644 --- a/target/m68k/cpu-param.h +++ b/target/m68k/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef M68K_CPU_PARAM_H -#define M68K_CPU_PARAM_H 1 +#define M68K_CPU_PARAM_H #define TARGET_LONG_BITS 32 /* diff --git a/target/microblaze/cpu-param.h b/target/microblaze/cpu-param.h index 4d8297fa94..5e54ea0108 100644 --- a/target/microblaze/cpu-param.h +++ b/target/microblaze/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef MICROBLAZE_CPU_PARAM_H -#define MICROBLAZE_CPU_PARAM_H 1 +#define MICROBLAZE_CPU_PARAM_H /* * While system mode can address up to 64 bits of address space, diff --git a/target/mips/cpu-param.h b/target/mips/cpu-param.h index 1aebd01df9..f4c76994ea 100644 --- a/target/mips/cpu-param.h +++ b/target/mips/cpu-param.h @@ -5,7 +5,7 @@ */ #ifndef MIPS_CPU_PARAM_H -#define MIPS_CPU_PARAM_H 1 +#define MIPS_CPU_PARAM_H #ifdef TARGET_MIPS64 # define TARGET_LONG_BITS 64 diff --git a/target/nios2/cpu-param.h b/target/nios2/cpu-param.h index 38bedbfd61..177d720864 100644 --- a/target/nios2/cpu-param.h +++ b/target/nios2/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef NIOS2_CPU_PARAM_H -#define NIOS2_CPU_PARAM_H 1 +#define NIOS2_CPU_PARAM_H #define TARGET_LONG_BITS 32 #define TARGET_PAGE_BITS 12 diff --git a/target/openrisc/cpu-param.h b/target/openrisc/cpu-param.h index 06ee64d171..73be699f36 100644 --- a/target/openrisc/cpu-param.h +++ b/target/openrisc/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef OPENRISC_CPU_PARAM_H -#define OPENRISC_CPU_PARAM_H 1 +#define OPENRISC_CPU_PARAM_H #define TARGET_LONG_BITS 32 #define TARGET_PAGE_BITS 13 diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index dfbafc5236..41d1b2a24a 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -21,6 +21,7 @@ #include "qapi/error.h" #include "qemu/qemu-print.h" #include "cpu.h" +#include "exec/exec-all.h" static void openrisc_cpu_set_pc(CPUState *cs, vaddr value) { @@ -30,6 +31,15 @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.dflag = 0; } +static void openrisc_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + OpenRISCCPU *cpu = OPENRISC_CPU(cs); + + cpu->env.pc = tb->pc; +} + + static bool openrisc_cpu_has_work(CPUState *cs) { return cs->interrupt_request & (CPU_INTERRUPT_HARD | @@ -186,6 +196,7 @@ static const struct SysemuCPUOps openrisc_sysemu_ops = { static const struct TCGCPUOps openrisc_tcg_ops = { .initialize = openrisc_translate_init, + .synchronize_from_tb = openrisc_cpu_synchronize_from_tb, #ifndef CONFIG_USER_ONLY .tlb_fill = openrisc_cpu_tlb_fill, diff --git a/target/ppc/cpu-param.h b/target/ppc/cpu-param.h index 37b458d33d..ea377b7d06 100644 --- a/target/ppc/cpu-param.h +++ b/target/ppc/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef PPC_CPU_PARAM_H -#define PPC_CPU_PARAM_H 1 +#define PPC_CPU_PARAM_H #ifdef TARGET_PPC64 # define TARGET_LONG_BITS 64 diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h index 256d90f523..9692dd765e 100644 --- a/target/ppc/power8-pmu.h +++ b/target/ppc/power8-pmu.h @@ -10,8 +10,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef POWER8_PMU -#define POWER8_PMU +#ifndef POWER8_PMU_H +#define POWER8_PMU_H #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) void cpu_ppc_pmu_init(CPUPPCState *env); diff --git a/target/riscv/cpu-param.h b/target/riscv/cpu-param.h index 80eb615f93..ebaf26d26d 100644 --- a/target/riscv/cpu-param.h +++ b/target/riscv/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef RISCV_CPU_PARAM_H -#define RISCV_CPU_PARAM_H 1 +#define RISCV_CPU_PARAM_H #if defined(TARGET_RISCV64) # define TARGET_LONG_BITS 64 diff --git a/target/riscv/sbi_ecall_interface.h b/target/riscv/sbi_ecall_interface.h index fb1a3fa8f2..77574ed4cb 100644 --- a/target/riscv/sbi_ecall_interface.h +++ b/target/riscv/sbi_ecall_interface.h @@ -7,8 +7,8 @@ * Anup Patel <anup.patel@wdc.com> */ -#ifndef __SBI_ECALL_INTERFACE_H__ -#define __SBI_ECALL_INTERFACE_H__ +#ifndef SBI_ECALL_INTERFACE_H +#define SBI_ECALL_INTERFACE_H /* clang-format off */ diff --git a/target/s390x/cpu-param.h b/target/s390x/cpu-param.h index 472db648d7..bf951a002e 100644 --- a/target/s390x/cpu-param.h +++ b/target/s390x/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef S390_CPU_PARAM_H -#define S390_CPU_PARAM_H 1 +#define S390_CPU_PARAM_H #define TARGET_LONG_BITS 64 #define TARGET_PAGE_BITS 12 diff --git a/target/sh4/cpu-param.h b/target/sh4/cpu-param.h index 81ace3503b..98a02509bb 100644 --- a/target/sh4/cpu-param.h +++ b/target/sh4/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef SH4_CPU_PARAM_H -#define SH4_CPU_PARAM_H 1 +#define SH4_CPU_PARAM_H #define TARGET_LONG_BITS 32 #define TARGET_PAGE_BITS 12 /* 4k */ diff --git a/target/sparc/cpu-param.h b/target/sparc/cpu-param.h index 4746d89411..72ddc4a34f 100644 --- a/target/sparc/cpu-param.h +++ b/target/sparc/cpu-param.h @@ -5,7 +5,7 @@ */ #ifndef SPARC_CPU_PARAM_H -#define SPARC_CPU_PARAM_H 1 +#define SPARC_CPU_PARAM_H #ifdef TARGET_SPARC64 # define TARGET_LONG_BITS 64 diff --git a/target/tricore/cpu-param.h b/target/tricore/cpu-param.h index cf5d9af89d..2727913047 100644 --- a/target/tricore/cpu-param.h +++ b/target/tricore/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef TRICORE_CPU_PARAM_H -#define TRICORE_CPU_PARAM_H 1 +#define TRICORE_CPU_PARAM_H #define TARGET_LONG_BITS 32 #define TARGET_PAGE_BITS 14 diff --git a/target/xtensa/core-de233_fpu/core-isa.h b/target/xtensa/core-de233_fpu/core-isa.h index f125619e8d..40543b2c5e 100644 --- a/target/xtensa/core-de233_fpu/core-isa.h +++ b/target/xtensa/core-de233_fpu/core-isa.h @@ -28,8 +28,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef XTENSA_CORE_CONFIGURATION_H_ -#define XTENSA_CORE_CONFIGURATION_H_ +#ifndef XTENSA_CORE_DE233_FPU_CORE_ISA_H +#define XTENSA_CORE_DE233_FPU_CORE_ISA_H //depot/dev/Homewood/Xtensa/SWConfig/hal/core-common.h.tph#24 - edit change 444323 (text+ko) @@ -723,5 +723,4 @@ #endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ -#endif /* XTENSA_CORE_CONFIGURATION_H_ */ - +#endif /* XTENSA_CORE_DE233_FPU_CORE_ISA_H */ diff --git a/target/xtensa/core-de233_fpu/core-matmap.h b/target/xtensa/core-de233_fpu/core-matmap.h index cca51c7af1..e99e7d3123 100644 --- a/target/xtensa/core-de233_fpu/core-matmap.h +++ b/target/xtensa/core-de233_fpu/core-matmap.h @@ -43,11 +43,9 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #ifndef XTENSA_CONFIG_CORE_MATMAP_H #define XTENSA_CONFIG_CORE_MATMAP_H - /*---------------------------------------------------------------------- CACHE (MEMORY ACCESS) ATTRIBUTES ----------------------------------------------------------------------*/ @@ -713,5 +711,5 @@ -#endif /*XTENSA_CONFIG_CORE_MATMAP_H*/ +#endif /* XTENSA_CONFIG_CORE_MATMAP_H */ diff --git a/target/xtensa/core-dsp3400/core-isa.h b/target/xtensa/core-dsp3400/core-isa.h index 336b2467c6..1499ef2914 100644 --- a/target/xtensa/core-dsp3400/core-isa.h +++ b/target/xtensa/core-dsp3400/core-isa.h @@ -28,9 +28,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _XTENSA_CORE_CONFIGURATION_H -#define _XTENSA_CORE_CONFIGURATION_H - +#ifndef XTENSA_CORE_DSP3400_CORE_ISA_H +#define XTENSA_CORE_DSP3400_CORE_ISA_H /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -448,5 +447,4 @@ #endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ -#endif /* _XTENSA_CORE_CONFIGURATION_H */ - +#endif /* XTENSA_CORE_DSP3400_CORE_ISA_H */ diff --git a/target/xtensa/core-dsp3400/core-matmap.h b/target/xtensa/core-dsp3400/core-matmap.h index 8d1aa8336e..692012f9f4 100644 --- a/target/xtensa/core-dsp3400/core-matmap.h +++ b/target/xtensa/core-dsp3400/core-matmap.h @@ -43,11 +43,9 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #ifndef XTENSA_CONFIG_CORE_MATMAP_H #define XTENSA_CONFIG_CORE_MATMAP_H - /*---------------------------------------------------------------------- CACHE (MEMORY ACCESS) ATTRIBUTES ----------------------------------------------------------------------*/ @@ -308,5 +306,5 @@ -#endif /*XTENSA_CONFIG_CORE_MATMAP_H*/ +#endif /* XTENSA_CONFIG_CORE_MATMAP_H */ diff --git a/target/xtensa/cpu-param.h b/target/xtensa/cpu-param.h index 4fde21b941..b53e9a3e08 100644 --- a/target/xtensa/cpu-param.h +++ b/target/xtensa/cpu-param.h @@ -6,7 +6,7 @@ */ #ifndef XTENSA_CPU_PARAM_H -#define XTENSA_CPU_PARAM_H 1 +#define XTENSA_CPU_PARAM_H #define TARGET_LONG_BITS 32 #define TARGET_PAGE_BITS 12 diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 92c91dcde9..cc82088d52 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -23,7 +23,7 @@ */ #ifndef TCG_INTERNAL_H -#define TCG_INTERNAL_H 1 +#define TCG_INTERNAL_H #define TCG_HIGHWATER 1024 diff --git a/tests/data/acpi/q35/CEDT.cxl b/tests/data/acpi/q35/CEDT.cxl Binary files differnew file mode 100644 index 0000000000..b8fa06b00e --- /dev/null +++ b/tests/data/acpi/q35/CEDT.cxl diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl Binary files differnew file mode 100644 index 0000000000..c1206defed --- /dev/null +++ b/tests/data/acpi/q35/DSDT.cxl diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config deleted file mode 100644 index 9bd1a5a6fc..0000000000 --- a/tests/qemu-iotests/common.config +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2009 Red Hat, Inc. -# Copyright (c) 2000-2003,2006 Silicon Graphics, Inc. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# all tests should use a common language setting to prevent golden -# output mismatches. -export LANG=C - -PATH=".:$PATH" - -HOSTOS=$(uname -s) -arch=$(uname -m) -[[ "$arch" =~ "ppc64" ]] && qemu_arch=ppc64 || qemu_arch="$arch" - -# make sure we have a standard umask -umask 022 - -_optstr_add() -{ - if [ -n "$1" ]; then - echo "$1,$2" - else - echo "$2" - fi -} - -# make sure this script returns success -true diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 227e0a5be9..165b54a61e 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -17,6 +17,17 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # +export LANG=C + +PATH=".:$PATH" + +HOSTOS=$(uname -s) +arch=$(uname -m) +[[ "$arch" =~ "ppc64" ]] && qemu_arch=ppc64 || qemu_arch="$arch" + +# make sure we have a standard umask +umask 022 + # bail out, setting up .notrun file _notrun() { @@ -120,18 +131,14 @@ peek_file_raw() dd if="$1" bs=1 skip="$2" count="$3" status=none } -config=common.config -test -f $config || config=../common.config -if ! test -f $config -then - echo "$0: failed to find common.config" - exit 1 -fi -if ! . $config - then - echo "$0: failed to source common.config" - exit 1 -fi +_optstr_add() +{ + if [ -n "$1" ]; then + echo "$1,$2" + else + echo "$2" + fi +} # Set the variables to the empty string to turn Valgrind off # for specific processes, e.g. diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py index aae70a8341..5a771da86e 100644 --- a/tests/qemu-iotests/testrunner.py +++ b/tests/qemu-iotests/testrunner.py @@ -361,6 +361,9 @@ class TestRunner(ContextManager['TestRunner']): starttime=start, lasttime=last_el, end = '\n' if mp else '\r') + else: + testname = os.path.basename(test) + print(f'# running {self.env.imgfmt} {testname}') res = self.do_run_test(test, mp) @@ -378,6 +381,7 @@ class TestRunner(ContextManager['TestRunner']): else: print(res.casenotrun) + sys.stdout.flush() return res def run_tests(self, tests: List[str], jobs: int = 1) -> bool: diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn new file mode 100755 index 0000000000..b121f2e363 --- /dev/null +++ b/tests/qemu-iotests/tests/nbd-multiconn @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# group: rw auto quick +# +# Test cases for NBD multi-conn advertisement +# +# Copyright (C) 2022 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import os +from contextlib import contextmanager +import iotests +from iotests import qemu_img_create, qemu_io + + +disk = os.path.join(iotests.test_dir, 'disk') +size = '4M' +nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') +nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock + + +@contextmanager +def open_nbd(export_name): + h = nbd.NBD() + try: + h.connect_uri(nbd_uri.format(export_name)) + yield h + finally: + h.shutdown() + +class TestNbdMulticonn(iotests.QMPTestCase): + def setUp(self): + qemu_img_create('-f', iotests.imgfmt, disk, size) + qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk) + + self.vm = iotests.VM() + self.vm.launch() + result = self.vm.qmp('blockdev-add', { + 'driver': 'qcow2', + 'node-name': 'n', + 'file': {'driver': 'file', 'filename': disk} + }) + self.assert_qmp(result, 'return', {}) + + def tearDown(self): + self.vm.shutdown() + os.remove(disk) + try: + os.remove(nbd_sock) + except OSError: + pass + + @contextmanager + def run_server(self, max_connections=None): + args = { + 'addr': { + 'type': 'unix', + 'data': {'path': nbd_sock} + } + } + if max_connections is not None: + args['max-connections'] = max_connections + + result = self.vm.qmp('nbd-server-start', args) + self.assert_qmp(result, 'return', {}) + yield + + result = self.vm.qmp('nbd-server-stop') + self.assert_qmp(result, 'return', {}) + + def add_export(self, name, writable=None): + args = { + 'type': 'nbd', + 'id': name, + 'node-name': 'n', + 'name': name, + } + if writable is not None: + args['writable'] = writable + + result = self.vm.qmp('block-export-add', args) + self.assert_qmp(result, 'return', {}) + + def test_default_settings(self): + with self.run_server(): + self.add_export('r') + self.add_export('w', writable=True) + with open_nbd('r') as h: + self.assertTrue(h.can_multi_conn()) + with open_nbd('w') as h: + self.assertTrue(h.can_multi_conn()) + + def test_limited_connections(self): + with self.run_server(max_connections=1): + self.add_export('r') + self.add_export('w', writable=True) + with open_nbd('r') as h: + self.assertFalse(h.can_multi_conn()) + with open_nbd('w') as h: + self.assertFalse(h.can_multi_conn()) + + def test_parallel_writes(self): + with self.run_server(): + self.add_export('w', writable=True) + + clients = [nbd.NBD() for _ in range(3)] + for c in clients: + c.connect_uri(nbd_uri.format('w')) + self.assertTrue(c.can_multi_conn()) + + initial_data = clients[0].pread(1024 * 1024, 0) + self.assertEqual(initial_data, b'\x01' * 1024 * 1024) + + updated_data = b'\x03' * 1024 * 1024 + clients[1].pwrite(updated_data, 0) + clients[2].flush() + current_data = clients[0].pread(1024 * 1024, 0) + + self.assertEqual(updated_data, current_data) + + for i in range(3): + clients[i].shutdown() + + +if __name__ == '__main__': + try: + # Easier to use libnbd than to try and set up parallel + # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems + # have libnbd installed. + import nbd # type: ignore + + iotests.main(supported_fmts=['qcow2']) + except ImportError: + iotests.notrun('libnbd not installed') diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out new file mode 100644 index 0000000000..8d7e996700 --- /dev/null +++ b/tests/qemu-iotests/tests/nbd-multiconn.out @@ -0,0 +1,5 @@ +... +---------------------------------------------------------------------- +Ran 3 tests + +OK diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out index 0bf1abb063..9d938db24e 100644 --- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out +++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out @@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 exports available: 1 export: '' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x48f ( readonly flush fua df cache ) min block: 1 opt block: 4096 max block: 33554432 diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 5dddedabcd..a4a46e97f0 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1536,6 +1536,49 @@ static void test_acpi_q35_viot(void) free_test_data(&data); } +static void test_acpi_q35_cxl(void) +{ + gchar *tmp_path = g_dir_make_tmp("qemu-test-cxl.XXXXXX", NULL); + gchar *params; + + test_data data = { + .machine = MACHINE_Q35, + .variant = ".cxl", + }; + /* + * A complex CXL setup. + */ + params = g_strdup_printf(" -machine cxl=on" + " -object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M" + " -object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M" + " -object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M" + " -object memory-backend-file,id=cxl-mem4,mem-path=%s,size=256M" + " -object memory-backend-file,id=lsa1,mem-path=%s,size=256M" + " -object memory-backend-file,id=lsa2,mem-path=%s,size=256M" + " -object memory-backend-file,id=lsa3,mem-path=%s,size=256M" + " -object memory-backend-file,id=lsa4,mem-path=%s,size=256M" + " -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1" + " -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2" + " -device cxl-rp,port=0,bus=cxl.1,id=rp1,chassis=0,slot=2" + " -device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1" + " -device cxl-rp,port=1,bus=cxl.1,id=rp2,chassis=0,slot=3" + " -device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2" + " -device cxl-rp,port=0,bus=cxl.2,id=rp3,chassis=0,slot=5" + " -device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3" + " -device cxl-rp,port=1,bus=cxl.2,id=rp4,chassis=0,slot=6" + " -device cxl-type3,bus=rp4,memdev=cxl-mem4,lsa=lsa4" + " -cxl-fixed-memory-window targets.0=cxl.1,size=4G,interleave-granularity=8k" + " -cxl-fixed-memory-window targets.0=cxl.1,targets.1=cxl.2,size=4G,interleave-granularity=8k", + tmp_path, tmp_path, tmp_path, tmp_path, + tmp_path, tmp_path, tmp_path, tmp_path); + test_acpi_one(params, &data); + + g_free(params); + g_assert(g_rmdir(tmp_path) == 0); + g_free(tmp_path); + free_test_data(&data); +} + static void test_acpi_virt_viot(void) { test_data data = { @@ -1741,6 +1784,7 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); } qtest_add_func("acpi/q35/viot", test_acpi_q35_viot); + qtest_add_func("acpi/q35/cxl", test_acpi_q35_cxl); qtest_add_func("acpi/q35/slic", test_acpi_q35_slic); } else if (strcmp(arch, "aarch64") == 0) { if (has_tcg) { diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c new file mode 100644 index 0000000000..079011af6a --- /dev/null +++ b/tests/qtest/cxl-test.c @@ -0,0 +1,151 @@ +/* + * QTest testcase for CXL + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest-single.h" + +#define QEMU_PXB_CMD "-machine q35,cxl=on " \ + "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ + "-cxl-fixed-memory-window targets.0=cxl.0,size=4G " + +#define QEMU_2PXB_CMD "-machine q35,cxl=on " \ + "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ + "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ + "-cxl-fixed-memory-window targets.0=cxl.0,targets.1=cxl.1,size=4G " + +#define QEMU_RP "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " + +/* Dual ports on first pxb */ +#define QEMU_2RP "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " \ + "-device cxl-rp,id=rp1,bus=cxl.0,chassis=0,slot=1 " + +/* Dual ports on each of the pxb instances */ +#define QEMU_4RP "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " \ + "-device cxl-rp,id=rp1,bus=cxl.0,chassis=0,slot=1 " \ + "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \ + "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 " + +#define QEMU_T3D "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " + +#define QEMU_2T3D "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ + "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " + +#define QEMU_4T3D "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ + "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " \ + "-object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa2,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2,id=cxl-pmem2 " \ + "-object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa3,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3,id=cxl-pmem3 " + +static void cxl_basic_hb(void) +{ + qtest_start("-machine q35,cxl=on"); + qtest_end(); +} + +static void cxl_basic_pxb(void) +{ + qtest_start("-machine q35,cxl=on -device pxb-cxl,bus=pcie.0"); + qtest_end(); +} + +static void cxl_pxb_with_window(void) +{ + qtest_start(QEMU_PXB_CMD); + qtest_end(); +} + +static void cxl_2pxb_with_window(void) +{ + qtest_start(QEMU_2PXB_CMD); + qtest_end(); +} + +static void cxl_root_port(void) +{ + qtest_start(QEMU_PXB_CMD QEMU_RP); + qtest_end(); +} + +static void cxl_2root_port(void) +{ + qtest_start(QEMU_PXB_CMD QEMU_2RP); + qtest_end(); +} + +static void cxl_t3d(void) +{ + g_autoptr(GString) cmdline = g_string_new(NULL); + char template[] = "/tmp/cxl-test-XXXXXX"; + const char *tmpfs; + + tmpfs = mkdtemp(template); + + g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D, tmpfs, tmpfs); + + qtest_start(cmdline->str); + qtest_end(); +} + +static void cxl_1pxb_2rp_2t3d(void) +{ + g_autoptr(GString) cmdline = g_string_new(NULL); + char template[] = "/tmp/cxl-test-XXXXXX"; + const char *tmpfs; + + tmpfs = mkdtemp(template); + + g_string_printf(cmdline, QEMU_PXB_CMD QEMU_2RP QEMU_2T3D, + tmpfs, tmpfs, tmpfs, tmpfs); + + qtest_start(cmdline->str); + qtest_end(); +} + +static void cxl_2pxb_4rp_4t3d(void) +{ + g_autoptr(GString) cmdline = g_string_new(NULL); + char template[] = "/tmp/cxl-test-XXXXXX"; + const char *tmpfs; + + tmpfs = mkdtemp(template); + + g_string_printf(cmdline, QEMU_2PXB_CMD QEMU_4RP QEMU_4T3D, + tmpfs, tmpfs, tmpfs, tmpfs, tmpfs, tmpfs, + tmpfs, tmpfs); + + qtest_start(cmdline->str); + qtest_end(); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/pci/cxl/basic_hostbridge", cxl_basic_hb); + qtest_add_func("/pci/cxl/basic_pxb", cxl_basic_pxb); + qtest_add_func("/pci/cxl/pxb_with_window", cxl_pxb_with_window); + qtest_add_func("/pci/cxl/pxb_x2_with_window", cxl_2pxb_with_window); + qtest_add_func("/pci/cxl/rp", cxl_root_port); + qtest_add_func("/pci/cxl/rp_x2", cxl_2root_port); + qtest_add_func("/pci/cxl/type3_device", cxl_t3d); + qtest_add_func("/pci/cxl/rp_x2_type3_x2", cxl_1pxb_2rp_2t3d); + qtest_add_func("/pci/cxl/pxb_x2_root_port_x4_type3_x4", cxl_2pxb_4rp_4t3d); + return g_test_run(); +} diff --git a/tests/qtest/e1000e-test.c b/tests/qtest/e1000e-test.c index ddd6983ede..c98779c7c0 100644 --- a/tests/qtest/e1000e-test.c +++ b/tests/qtest/e1000e-test.c @@ -233,6 +233,12 @@ static void test_e1000e_multiple_transfers(void *obj, void *data, static void test_e1000e_hotplug(void *obj, void *data, QGuestAllocator * alloc) { QTestState *qts = global_qtest; /* TODO: get rid of global_qtest here */ + QE1000E_PCI *dev = obj; + + if (dev->pci_dev.bus->not_hotpluggable) { + g_test_skip("pci bus does not support hotplug"); + return; + } qtest_qmp_device_add(qts, "e1000e", "e1000e_net", "{'addr': '0x06'}"); qpci_unplug_acpi_device_test(qts, "e1000e_net", 0x06); diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c index 0b3c2c0d52..52ade90a7d 100644 --- a/tests/qtest/fdc-test.c +++ b/tests/qtest/fdc-test.c @@ -582,6 +582,26 @@ static void test_cve_2021_20196(void) qtest_quit(s); } +static void test_cve_2021_3507(void) +{ + QTestState *s; + + s = qtest_initf("-nographic -m 32M -nodefaults " + "-drive file=%s,format=raw,if=floppy,snapshot=on", + test_image); + qtest_outl(s, 0x9, 0x0a0206); + qtest_outw(s, 0x3f4, 0x1600); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outw(s, 0x3f4, 0x0200); + qtest_outw(s, 0x3f4, 0x0200); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outw(s, 0x3f4, 0x0000); + qtest_quit(s); +} + int main(int argc, char **argv) { int fd; @@ -613,6 +633,7 @@ int main(int argc, char **argv) qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); qtest_add_func("/fdc/fuzz-registers", fuzz_registers); qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); + qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); ret = g_test_run(); diff --git a/tests/qtest/fuzz/fuzz.h b/tests/qtest/fuzz/fuzz.h index c5f0b7227a..327c1c5a55 100644 --- a/tests/qtest/fuzz/fuzz.h +++ b/tests/qtest/fuzz/fuzz.h @@ -11,8 +11,8 @@ * */ -#ifndef FUZZER_H_ -#define FUZZER_H_ +#ifndef QTEST_FUZZ_H +#define QTEST_FUZZ_H #include "qemu/units.h" #include "qapi/error.h" @@ -122,4 +122,3 @@ int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); int LLVMFuzzerInitialize(int *argc, char ***argv, char ***envp); #endif - diff --git a/tests/qtest/libqmp.h b/tests/qtest/libqmp.h index 94aa97328a..5cb7eeaa18 100644 --- a/tests/qtest/libqmp.h +++ b/tests/qtest/libqmp.h @@ -14,8 +14,9 @@ * See the COPYING file in the top-level directory. * */ -#ifndef LIBQMP_H_ -#define LIBQMP_H_ + +#ifndef LIBQMP_H +#define LIBQMP_H #include "qapi/qmp/qdict.h" @@ -47,4 +48,4 @@ bool qmp_rsp_is_err(QDict *rsp); */ void qmp_expect_error_and_unref(QDict *rsp, const char *class); -#endif /* LIBQMP_H_ */ +#endif /* LIBQMP_H */ diff --git a/tests/qtest/libqos/arm-virt-machine.c b/tests/qtest/libqos/arm-virt-machine.c index 2e0beaefb8..139eaba142 100644 --- a/tests/qtest/libqos/arm-virt-machine.c +++ b/tests/qtest/libqos/arm-virt-machine.c @@ -22,6 +22,8 @@ #include "malloc.h" #include "qgraph.h" #include "virtio-mmio.h" +#include "generic-pcihost.h" +#include "hw/pci/pci_regs.h" #define ARM_PAGE_SIZE 4096 #define VIRTIO_MMIO_BASE_ADDR 0x0A003E00 @@ -35,6 +37,7 @@ struct QVirtMachine { QOSGraphObject obj; QGuestAllocator alloc; QVirtioMMIODevice virtio_mmio; + QGenericPCIHost bridge; }; static void virt_destructor(QOSGraphObject *obj) @@ -57,11 +60,13 @@ static void *virt_get_driver(void *object, const char *interface) static QOSGraphObject *virt_get_device(void *obj, const char *device) { QVirtMachine *machine = obj; - if (!g_strcmp0(device, "virtio-mmio")) { + if (!g_strcmp0(device, "generic-pcihost")) { + return &machine->bridge.obj; + } else if (!g_strcmp0(device, "virtio-mmio")) { return &machine->virtio_mmio.obj; } - fprintf(stderr, "%s not present in arm/virtio\n", device); + fprintf(stderr, "%s not present in arm/virt\n", device); g_assert_not_reached(); } @@ -76,16 +81,22 @@ static void *qos_create_machine_arm_virt(QTestState *qts) qvirtio_mmio_init_device(&machine->virtio_mmio, qts, VIRTIO_MMIO_BASE_ADDR, VIRTIO_MMIO_SIZE); + qos_create_generic_pcihost(&machine->bridge, qts, &machine->alloc); + machine->obj.get_device = virt_get_device; machine->obj.get_driver = virt_get_driver; machine->obj.destructor = virt_destructor; return machine; } -static void virtio_mmio_register_nodes(void) +static void virt_machine_register_nodes(void) { qos_node_create_machine("arm/virt", qos_create_machine_arm_virt); qos_node_contains("arm/virt", "virtio-mmio", NULL); + + qos_node_create_machine_args("aarch64/virt", qos_create_machine_arm_virt, + " -cpu max"); + qos_node_contains("aarch64/virt", "generic-pcihost", NULL); } -libqos_init(virtio_mmio_register_nodes); +libqos_init(virt_machine_register_nodes); diff --git a/tests/qtest/libqos/generic-pcihost.c b/tests/qtest/libqos/generic-pcihost.c new file mode 100644 index 0000000000..3124b0e46b --- /dev/null +++ b/tests/qtest/libqos/generic-pcihost.c @@ -0,0 +1,231 @@ +/* + * libqos PCI bindings for generic PCI + * + * Copyright Red Hat Inc., 2022 + * + * Authors: + * Eric Auger <eric.auger@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "../libqtest.h" +#include "generic-pcihost.h" +#include "qapi/qmp/qdict.h" +#include "hw/pci/pci_regs.h" +#include "qemu/host-utils.h" + +#include "qemu/module.h" + +/* QGenericPCIHost */ + +QOSGraphObject *generic_pcihost_get_device(void *obj, const char *device) +{ + QGenericPCIHost *host = obj; + if (!g_strcmp0(device, "pci-bus-generic")) { + return &host->pci.obj; + } + fprintf(stderr, "%s not present in generic-pcihost\n", device); + g_assert_not_reached(); +} + +void qos_create_generic_pcihost(QGenericPCIHost *host, + QTestState *qts, + QGuestAllocator *alloc) +{ + host->obj.get_device = generic_pcihost_get_device; + qpci_init_generic(&host->pci, qts, alloc, false); +} + +static uint8_t qpci_generic_pio_readb(QPCIBus *bus, uint32_t addr) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + return qtest_readb(bus->qts, s->gpex_pio_base + addr); +} + +static void qpci_generic_pio_writeb(QPCIBus *bus, uint32_t addr, uint8_t val) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + qtest_writeb(bus->qts, s->gpex_pio_base + addr, val); +} + +static uint16_t qpci_generic_pio_readw(QPCIBus *bus, uint32_t addr) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + return qtest_readw(bus->qts, s->gpex_pio_base + addr); +} + +static void qpci_generic_pio_writew(QPCIBus *bus, uint32_t addr, uint16_t val) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + qtest_writew(bus->qts, s->gpex_pio_base + addr, val); +} + +static uint32_t qpci_generic_pio_readl(QPCIBus *bus, uint32_t addr) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + return qtest_readl(bus->qts, s->gpex_pio_base + addr); +} + +static void qpci_generic_pio_writel(QPCIBus *bus, uint32_t addr, uint32_t val) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + qtest_writel(bus->qts, s->gpex_pio_base + addr, val); +} + +static uint64_t qpci_generic_pio_readq(QPCIBus *bus, uint32_t addr) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + return qtest_readq(bus->qts, s->gpex_pio_base + addr); +} + +static void qpci_generic_pio_writeq(QPCIBus *bus, uint32_t addr, uint64_t val) +{ + QGenericPCIBus *s = container_of(bus, QGenericPCIBus, bus); + + qtest_writeq(bus->qts, s->gpex_pio_base + addr, val); +} + +static void qpci_generic_memread(QPCIBus *bus, uint32_t addr, void *buf, size_t len) +{ + qtest_memread(bus->qts, addr, buf, len); +} + +static void qpci_generic_memwrite(QPCIBus *bus, uint32_t addr, + const void *buf, size_t len) +{ + qtest_memwrite(bus->qts, addr, buf, len); +} + +static uint8_t qpci_generic_config_readb(QPCIBus *bus, int devfn, uint8_t offset) +{ + QGenericPCIBus *gbus = container_of(bus, QGenericPCIBus, bus); + uint64_t addr = gbus->ecam_alloc_ptr + ((0 << 20) | (devfn << 12) | offset); + uint8_t val; + + qtest_memread(bus->qts, addr, &val, 1); + return val; +} + +static uint16_t qpci_generic_config_readw(QPCIBus *bus, int devfn, uint8_t offset) +{ + QGenericPCIBus *gbus = container_of(bus, QGenericPCIBus, bus); + uint64_t addr = gbus->ecam_alloc_ptr + ((0 << 20) | (devfn << 12) | offset); + uint16_t val; + + qtest_memread(bus->qts, addr, &val, 2); + return le16_to_cpu(val); +} + +static uint32_t qpci_generic_config_readl(QPCIBus *bus, int devfn, uint8_t offset) +{ + QGenericPCIBus *gbus = container_of(bus, QGenericPCIBus, bus); + uint64_t addr = gbus->ecam_alloc_ptr + ((0 << 20) | (devfn << 12) | offset); + uint32_t val; + + qtest_memread(bus->qts, addr, &val, 4); + return le32_to_cpu(val); +} + +static void +qpci_generic_config_writeb(QPCIBus *bus, int devfn, uint8_t offset, uint8_t value) +{ + QGenericPCIBus *gbus = container_of(bus, QGenericPCIBus, bus); + uint64_t addr = gbus->ecam_alloc_ptr + ((0 << 20) | (devfn << 12) | offset); + + qtest_memwrite(bus->qts, addr, &value, 1); +} + +static void +qpci_generic_config_writew(QPCIBus *bus, int devfn, uint8_t offset, uint16_t value) +{ + QGenericPCIBus *gbus = container_of(bus, QGenericPCIBus, bus); + uint64_t addr = gbus->ecam_alloc_ptr + ((0 << 20) | (devfn << 12) | offset); + uint16_t val = cpu_to_le16(value); + + qtest_memwrite(bus->qts, addr, &val, 2); +} + +static void +qpci_generic_config_writel(QPCIBus *bus, int devfn, uint8_t offset, uint32_t value) +{ + QGenericPCIBus *gbus = container_of(bus, QGenericPCIBus, bus); + uint64_t addr = gbus->ecam_alloc_ptr + ((0 << 20) | (devfn << 12) | offset); + uint32_t val = cpu_to_le32(value); + + qtest_memwrite(bus->qts, addr, &val, 4); +} + +static void *qpci_generic_get_driver(void *obj, const char *interface) +{ + QGenericPCIBus *qpci = obj; + if (!g_strcmp0(interface, "pci-bus")) { + return &qpci->bus; + } + fprintf(stderr, "%s not present in pci-bus-generic\n", interface); + g_assert_not_reached(); +} + +void qpci_init_generic(QGenericPCIBus *qpci, QTestState *qts, + QGuestAllocator *alloc, bool hotpluggable) +{ + assert(qts); + + qpci->gpex_pio_base = 0x3eff0000; + qpci->bus.not_hotpluggable = !hotpluggable; + qpci->bus.has_buggy_msi = false; + + qpci->bus.pio_readb = qpci_generic_pio_readb; + qpci->bus.pio_readw = qpci_generic_pio_readw; + qpci->bus.pio_readl = qpci_generic_pio_readl; + qpci->bus.pio_readq = qpci_generic_pio_readq; + + qpci->bus.pio_writeb = qpci_generic_pio_writeb; + qpci->bus.pio_writew = qpci_generic_pio_writew; + qpci->bus.pio_writel = qpci_generic_pio_writel; + qpci->bus.pio_writeq = qpci_generic_pio_writeq; + + qpci->bus.memread = qpci_generic_memread; + qpci->bus.memwrite = qpci_generic_memwrite; + + qpci->bus.config_readb = qpci_generic_config_readb; + qpci->bus.config_readw = qpci_generic_config_readw; + qpci->bus.config_readl = qpci_generic_config_readl; + + qpci->bus.config_writeb = qpci_generic_config_writeb; + qpci->bus.config_writew = qpci_generic_config_writew; + qpci->bus.config_writel = qpci_generic_config_writel; + + qpci->bus.qts = qts; + qpci->bus.pio_alloc_ptr = 0x0000; + qpci->bus.pio_limit = 0x10000; + qpci->bus.mmio_alloc_ptr = 0x10000000; + qpci->bus.mmio_limit = 0x2eff0000; + qpci->ecam_alloc_ptr = 0x4010000000; + + qpci->obj.get_driver = qpci_generic_get_driver; +} + +static void qpci_generic_register_nodes(void) +{ + qos_node_create_driver("pci-bus-generic", NULL); + qos_node_produces("pci-bus-generic", "pci-bus"); +} + +static void qpci_generic_pci_register_nodes(void) +{ + qos_node_create_driver("generic-pcihost", NULL); + qos_node_contains("generic-pcihost", "pci-bus-generic", NULL); +} + +libqos_init(qpci_generic_register_nodes); +libqos_init(qpci_generic_pci_register_nodes); diff --git a/tests/qtest/libqos/generic-pcihost.h b/tests/qtest/libqos/generic-pcihost.h new file mode 100644 index 0000000000..c693c769df --- /dev/null +++ b/tests/qtest/libqos/generic-pcihost.h @@ -0,0 +1,54 @@ +/* + * libqos Generic PCI bindings and generic pci host bridge + * + * Copyright Red Hat Inc., 2022 + * + * Authors: + * Eric Auger <eric.auger@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef LIBQOS_GENERIC_PCIHOST_H +#define LIBQOS_GENERIC_PCIHOST_H + +#include "pci.h" +#include "malloc.h" +#include "qgraph.h" + +typedef struct QGenericPCIBus { + QOSGraphObject obj; + QPCIBus bus; + uint64_t gpex_pio_base; + uint64_t ecam_alloc_ptr; +} QGenericPCIBus; + +/* + * qpci_init_generic(): + * @ret: A valid QGenericPCIBus * pointer + * @qts: The %QTestState + * @alloc: A previously initialized @alloc providing memory for @qts + * @bool: devices can be hotplugged on this bus + * + * This function initializes an already allocated + * QGenericPCIBus object. + */ +void qpci_init_generic(QGenericPCIBus *ret, QTestState *qts, + QGuestAllocator *alloc, bool hotpluggable); + +/* QGenericPCIHost */ + +typedef struct QGenericPCIHost QGenericPCIHost; + +struct QGenericPCIHost { + QOSGraphObject obj; + QGenericPCIBus pci; +}; + +QOSGraphObject *generic_pcihost_get_device(void *obj, const char *device); +void qos_create_generic_pcihost(QGenericPCIHost *host, + QTestState *qts, + QGuestAllocator *alloc); + +#endif diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build index 9f292339f9..fd5d6e5ae1 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -45,6 +45,7 @@ libqos_srcs = files( 'virtio-scsi.c', 'virtio-serial.c', 'virtio-iommu.c', + 'generic-pcihost.c', # qgraph machines: 'aarch64-xlnx-zcu102-machine.c', diff --git a/tests/qtest/libqos/pci-pc.c b/tests/qtest/libqos/pci-pc.c index e9dd5a57ec..81c2c055ca 100644 --- a/tests/qtest/libqos/pci-pc.c +++ b/tests/qtest/libqos/pci-pc.c @@ -150,6 +150,7 @@ void qpci_init_pc(QPCIBusPC *qpci, QTestState *qts, QGuestAllocator *alloc) qpci->bus.qts = qts; qpci->bus.pio_alloc_ptr = 0xc000; + qpci->bus.pio_limit = 0x10000; qpci->bus.mmio_alloc_ptr = 0xE0000000; qpci->bus.mmio_limit = 0x100000000ULL; diff --git a/tests/qtest/libqos/pci-spapr.c b/tests/qtest/libqos/pci-spapr.c index 76bf9a855d..0f1023e4a7 100644 --- a/tests/qtest/libqos/pci-spapr.c +++ b/tests/qtest/libqos/pci-spapr.c @@ -197,6 +197,7 @@ void qpci_init_spapr(QPCIBusSPAPR *qpci, QTestState *qts, qpci->bus.qts = qts; qpci->bus.pio_alloc_ptr = 0xc000; + qpci->bus.pio_limit = 0x10000; qpci->bus.mmio_alloc_ptr = qpci->mmio32.pci_base; qpci->bus.mmio_limit = qpci->mmio32.pci_base + qpci->mmio32.size; diff --git a/tests/qtest/libqos/pci.c b/tests/qtest/libqos/pci.c index 3a9076ae58..b23d72346b 100644 --- a/tests/qtest/libqos/pci.c +++ b/tests/qtest/libqos/pci.c @@ -398,44 +398,56 @@ void qpci_config_writel(QPCIDevice *dev, uint8_t offset, uint32_t value) uint8_t qpci_io_readb(QPCIDevice *dev, QPCIBar token, uint64_t off) { - if (token.addr < QPCI_PIO_LIMIT) { - return dev->bus->pio_readb(dev->bus, token.addr + off); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + return bus->pio_readb(bus, token.addr + off); } else { uint8_t val; - dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val)); + + bus->memread(dev->bus, token.addr + off, &val, sizeof(val)); return val; } } uint16_t qpci_io_readw(QPCIDevice *dev, QPCIBar token, uint64_t off) { - if (token.addr < QPCI_PIO_LIMIT) { - return dev->bus->pio_readw(dev->bus, token.addr + off); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + return bus->pio_readw(bus, token.addr + off); } else { uint16_t val; - dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val)); + + bus->memread(bus, token.addr + off, &val, sizeof(val)); return le16_to_cpu(val); } } uint32_t qpci_io_readl(QPCIDevice *dev, QPCIBar token, uint64_t off) { - if (token.addr < QPCI_PIO_LIMIT) { - return dev->bus->pio_readl(dev->bus, token.addr + off); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + return bus->pio_readl(bus, token.addr + off); } else { uint32_t val; - dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val)); + + bus->memread(dev->bus, token.addr + off, &val, sizeof(val)); return le32_to_cpu(val); } } uint64_t qpci_io_readq(QPCIDevice *dev, QPCIBar token, uint64_t off) { - if (token.addr < QPCI_PIO_LIMIT) { - return dev->bus->pio_readq(dev->bus, token.addr + off); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + return bus->pio_readq(bus, token.addr + off); } else { uint64_t val; - dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val)); + + bus->memread(bus, token.addr + off, &val, sizeof(val)); return le64_to_cpu(val); } } @@ -443,57 +455,65 @@ uint64_t qpci_io_readq(QPCIDevice *dev, QPCIBar token, uint64_t off) void qpci_io_writeb(QPCIDevice *dev, QPCIBar token, uint64_t off, uint8_t value) { - if (token.addr < QPCI_PIO_LIMIT) { - dev->bus->pio_writeb(dev->bus, token.addr + off, value); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + bus->pio_writeb(bus, token.addr + off, value); } else { - dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value)); + bus->memwrite(bus, token.addr + off, &value, sizeof(value)); } } void qpci_io_writew(QPCIDevice *dev, QPCIBar token, uint64_t off, uint16_t value) { - if (token.addr < QPCI_PIO_LIMIT) { - dev->bus->pio_writew(dev->bus, token.addr + off, value); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + bus->pio_writew(bus, token.addr + off, value); } else { value = cpu_to_le16(value); - dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value)); + bus->memwrite(bus, token.addr + off, &value, sizeof(value)); } } void qpci_io_writel(QPCIDevice *dev, QPCIBar token, uint64_t off, uint32_t value) { - if (token.addr < QPCI_PIO_LIMIT) { - dev->bus->pio_writel(dev->bus, token.addr + off, value); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + bus->pio_writel(bus, token.addr + off, value); } else { value = cpu_to_le32(value); - dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value)); + bus->memwrite(bus, token.addr + off, &value, sizeof(value)); } } void qpci_io_writeq(QPCIDevice *dev, QPCIBar token, uint64_t off, uint64_t value) { - if (token.addr < QPCI_PIO_LIMIT) { - dev->bus->pio_writeq(dev->bus, token.addr + off, value); + QPCIBus *bus = dev->bus; + + if (token.is_io) { + bus->pio_writeq(bus, token.addr + off, value); } else { value = cpu_to_le64(value); - dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value)); + bus->memwrite(bus, token.addr + off, &value, sizeof(value)); } } void qpci_memread(QPCIDevice *dev, QPCIBar token, uint64_t off, void *buf, size_t len) { - g_assert(token.addr >= QPCI_PIO_LIMIT); + g_assert(!token.is_io); dev->bus->memread(dev->bus, token.addr + off, buf, len); } void qpci_memwrite(QPCIDevice *dev, QPCIBar token, uint64_t off, const void *buf, size_t len) { - g_assert(token.addr >= QPCI_PIO_LIMIT); + g_assert(!token.is_io); dev->bus->memwrite(dev->bus, token.addr + off, buf, len); } @@ -534,9 +554,10 @@ QPCIBar qpci_iomap(QPCIDevice *dev, int barno, uint64_t *sizeptr) loc = QEMU_ALIGN_UP(bus->pio_alloc_ptr, size); g_assert(loc >= bus->pio_alloc_ptr); - g_assert(loc + size <= QPCI_PIO_LIMIT); /* Keep PIO below 64kiB */ + g_assert(loc + size <= bus->pio_limit); bus->pio_alloc_ptr = loc + size; + bar.is_io = true; qpci_config_writel(dev, bar_reg, loc | PCI_BASE_ADDRESS_SPACE_IO); } else { @@ -547,6 +568,7 @@ QPCIBar qpci_iomap(QPCIDevice *dev, int barno, uint64_t *sizeptr) g_assert(loc + size <= bus->mmio_limit); bus->mmio_alloc_ptr = loc + size; + bar.is_io = false; qpci_config_writel(dev, bar_reg, loc); } @@ -562,7 +584,7 @@ void qpci_iounmap(QPCIDevice *dev, QPCIBar bar) QPCIBar qpci_legacy_iomap(QPCIDevice *dev, uint16_t addr) { - QPCIBar bar = { .addr = addr }; + QPCIBar bar = { .addr = addr, .is_io = true }; return bar; } diff --git a/tests/qtest/libqos/pci.h b/tests/qtest/libqos/pci.h index e705e06598..8389614523 100644 --- a/tests/qtest/libqos/pci.h +++ b/tests/qtest/libqos/pci.h @@ -16,8 +16,6 @@ #include "../libqtest.h" #include "qgraph.h" -#define QPCI_PIO_LIMIT 0x10000 - #define QPCI_DEVFN(dev, fn) (((dev) << 3) | (fn)) typedef struct QPCIDevice QPCIDevice; @@ -51,14 +49,16 @@ struct QPCIBus { uint8_t offset, uint32_t value); QTestState *qts; - uint16_t pio_alloc_ptr; + uint64_t pio_alloc_ptr, pio_limit; uint64_t mmio_alloc_ptr, mmio_limit; bool has_buggy_msi; /* TRUE for spapr, FALSE for pci */ + bool not_hotpluggable; /* TRUE if devices cannot be hotplugged */ }; struct QPCIBar { uint64_t addr; + bool is_io; }; struct QPCIDevice diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 3551b9c946..31287a9173 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -35,6 +35,9 @@ qtests_pci = \ (config_all_devices.has_key('CONFIG_VGA') ? ['display-vga-test'] : []) + \ (config_all_devices.has_key('CONFIG_IVSHMEM_DEVICE') ? ['ivshmem-test'] : []) +qtests_cxl = \ + (config_all_devices.has_key('CONFIG_CXL') ? ['cxl-test'] : []) + qtests_i386 = \ (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ @@ -74,6 +77,7 @@ qtests_i386 = \ slirp.found() ? ['virtio-net-failover'] : []) + \ (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ qtests_pci + \ + qtests_cxl + \ ['fdc-test', 'ide-test', 'hd-geo-test', @@ -273,13 +277,23 @@ endif tpmemu_files = ['tpm-emu.c', 'tpm-util.c', 'tpm-tests.c'] +migration_files = [files('migration-helpers.c')] +if gnutls.found() + migration_files += [files('../unit/crypto-tls-psk-helpers.c'), gnutls] + + if tasn1.found() + migration_files += [files('../unit/crypto-tls-x509-helpers.c', + '../unit/pkix_asn1_tab.c'), tasn1] + endif +endif + qtests = { 'bios-tables-test': [io, 'boot-sector.c', 'acpi-utils.c', 'tpm-emu.c'], 'cdrom-test': files('boot-sector.c'), 'dbus-vmstate-test': files('migration-helpers.c') + dbus_vmstate1, 'erst-test': files('erst-test.c'), 'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'], - 'migration-test': files('migration-helpers.c'), + 'migration-test': migration_files, 'pxe-test': files('boot-sector.c'), 'qos-test': [chardev, io, qos_test_ss.apply(config_host, strict: false).sources()], 'tpm-crb-swtpm-test': [io, tpmemu_files], diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index 4ee26014b7..a6aa59e4e6 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -107,6 +107,19 @@ QDict *migrate_query(QTestState *who) return wait_command(who, "{ 'execute': 'query-migrate' }"); } +QDict *migrate_query_not_failed(QTestState *who) +{ + const char *status; + QDict *rsp = migrate_query(who); + status = qdict_get_str(rsp, "status"); + if (g_str_equal(status, "failed")) { + g_printerr("query-migrate shows failed migration: %s\n", + qdict_get_str(rsp, "error-desc")); + } + g_assert(!g_str_equal(status, "failed")); + return rsp; +} + /* * Note: caller is responsible to free the returned object via * g_free() after use diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index c7872e8442..78587c2b82 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -9,8 +9,9 @@ * See the COPYING file in the top-level directory. * */ -#ifndef MIGRATION_HELPERS_H_ -#define MIGRATION_HELPERS_H_ + +#ifndef MIGRATION_HELPERS_H +#define MIGRATION_HELPERS_H #include "libqtest.h" @@ -26,6 +27,7 @@ G_GNUC_PRINTF(3, 4) void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...); QDict *migrate_query(QTestState *who); +QDict *migrate_query_not_failed(QTestState *who); void wait_for_migration_status(QTestState *who, const char *goal, const char **ungoals); @@ -34,4 +36,4 @@ void wait_for_migration_complete(QTestState *who); void wait_for_migration_fail(QTestState *from, bool allow_active); -#endif /* MIGRATION_HELPERS_H_ */ +#endif /* MIGRATION_HELPERS_H */ diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index cba6023eb5..d33e8060f9 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -23,9 +23,16 @@ #include "qapi/qapi-visit-sockets.h" #include "qapi/qobject-input-visitor.h" #include "qapi/qobject-output-visitor.h" +#include "crypto/tlscredspsk.h" #include "migration-helpers.h" #include "tests/migration/migration-test.h" +#ifdef CONFIG_GNUTLS +# include "tests/unit/crypto-tls-psk-helpers.h" +# ifdef CONFIG_TASN1 +# include "tests/unit/crypto-tls-x509-helpers.h" +# endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ /* For dirty ring test; so far only x86_64 is supported */ #if defined(__linux__) && defined(HOST_X86_64) @@ -174,7 +181,7 @@ static int64_t read_ram_property_int(QTestState *who, const char *property) QDict *rsp_return, *rsp_ram; int64_t result; - rsp_return = migrate_query(who); + rsp_return = migrate_query_not_failed(who); if (!qdict_haskey(rsp_return, "ram")) { /* Still in setup */ result = 0; @@ -191,7 +198,7 @@ static int64_t read_migrate_property_int(QTestState *who, const char *property) QDict *rsp_return; int64_t result; - rsp_return = migrate_query(who); + rsp_return = migrate_query_not_failed(who); result = qdict_get_try_int(rsp_return, property, 0); qobject_unref(rsp_return); return result; @@ -206,7 +213,7 @@ static void read_blocktime(QTestState *who) { QDict *rsp_return; - rsp_return = migrate_query(who); + rsp_return = migrate_query_not_failed(who); g_assert(qdict_haskey(rsp_return, "postcopy-blocktime")); qobject_unref(rsp_return); } @@ -640,6 +647,328 @@ static void test_migrate_end(QTestState *from, QTestState *to, bool test_dest) cleanup("dest_serial"); } +#ifdef CONFIG_GNUTLS +struct TestMigrateTLSPSKData { + char *workdir; + char *workdiralt; + char *pskfile; + char *pskfilealt; +}; + +static void * +test_migrate_tls_psk_start_common(QTestState *from, + QTestState *to, + bool mismatch) +{ + struct TestMigrateTLSPSKData *data = + g_new0(struct TestMigrateTLSPSKData, 1); + QDict *rsp; + + data->workdir = g_strdup_printf("%s/tlscredspsk0", tmpfs); + data->pskfile = g_strdup_printf("%s/%s", data->workdir, + QCRYPTO_TLS_CREDS_PSKFILE); + mkdir(data->workdir, 0700); + test_tls_psk_init(data->pskfile); + + if (mismatch) { + data->workdiralt = g_strdup_printf("%s/tlscredspskalt0", tmpfs); + data->pskfilealt = g_strdup_printf("%s/%s", data->workdiralt, + QCRYPTO_TLS_CREDS_PSKFILE); + mkdir(data->workdiralt, 0700); + test_tls_psk_init_alt(data->pskfilealt); + } + + rsp = wait_command(from, + "{ 'execute': 'object-add'," + " 'arguments': { 'qom-type': 'tls-creds-psk'," + " 'id': 'tlscredspsk0'," + " 'endpoint': 'client'," + " 'dir': %s," + " 'username': 'qemu'} }", + data->workdir); + qobject_unref(rsp); + + rsp = wait_command(to, + "{ 'execute': 'object-add'," + " 'arguments': { 'qom-type': 'tls-creds-psk'," + " 'id': 'tlscredspsk0'," + " 'endpoint': 'server'," + " 'dir': %s } }", + mismatch ? data->workdiralt : data->workdir); + qobject_unref(rsp); + + migrate_set_parameter_str(from, "tls-creds", "tlscredspsk0"); + migrate_set_parameter_str(to, "tls-creds", "tlscredspsk0"); + + return data; +} + +static void * +test_migrate_tls_psk_start_match(QTestState *from, + QTestState *to) +{ + return test_migrate_tls_psk_start_common(from, to, false); +} + +static void * +test_migrate_tls_psk_start_mismatch(QTestState *from, + QTestState *to) +{ + return test_migrate_tls_psk_start_common(from, to, true); +} + +static void +test_migrate_tls_psk_finish(QTestState *from, + QTestState *to, + void *opaque) +{ + struct TestMigrateTLSPSKData *data = opaque; + + test_tls_psk_cleanup(data->pskfile); + if (data->pskfilealt) { + test_tls_psk_cleanup(data->pskfilealt); + } + rmdir(data->workdir); + if (data->workdiralt) { + rmdir(data->workdiralt); + } + + g_free(data->workdiralt); + g_free(data->pskfilealt); + g_free(data->workdir); + g_free(data->pskfile); + g_free(data); +} + +#ifdef CONFIG_TASN1 +typedef struct { + char *workdir; + char *keyfile; + char *cacert; + char *servercert; + char *serverkey; + char *clientcert; + char *clientkey; +} TestMigrateTLSX509Data; + +typedef struct { + bool verifyclient; + bool clientcert; + bool hostileclient; + bool authzclient; + const char *certhostname; + const char *certipaddr; +} TestMigrateTLSX509; + +static void * +test_migrate_tls_x509_start_common(QTestState *from, + QTestState *to, + TestMigrateTLSX509 *args) +{ + TestMigrateTLSX509Data *data = g_new0(TestMigrateTLSX509Data, 1); + QDict *rsp; + + data->workdir = g_strdup_printf("%s/tlscredsx5090", tmpfs); + data->keyfile = g_strdup_printf("%s/key.pem", data->workdir); + + data->cacert = g_strdup_printf("%s/ca-cert.pem", data->workdir); + data->serverkey = g_strdup_printf("%s/server-key.pem", data->workdir); + data->servercert = g_strdup_printf("%s/server-cert.pem", data->workdir); + if (args->clientcert) { + data->clientkey = g_strdup_printf("%s/client-key.pem", data->workdir); + data->clientcert = g_strdup_printf("%s/client-cert.pem", data->workdir); + } + + mkdir(data->workdir, 0700); + + test_tls_init(data->keyfile); + g_assert(link(data->keyfile, data->serverkey) == 0); + if (args->clientcert) { + g_assert(link(data->keyfile, data->clientkey) == 0); + } + + TLS_ROOT_REQ_SIMPLE(cacertreq, data->cacert); + if (args->clientcert) { + TLS_CERT_REQ_SIMPLE_CLIENT(servercertreq, cacertreq, + args->hostileclient ? + QCRYPTO_TLS_TEST_CLIENT_HOSTILE_NAME : + QCRYPTO_TLS_TEST_CLIENT_NAME, + data->clientcert); + } + + TLS_CERT_REQ_SIMPLE_SERVER(clientcertreq, cacertreq, + data->servercert, + args->certhostname, + args->certipaddr); + + rsp = wait_command(from, + "{ 'execute': 'object-add'," + " 'arguments': { 'qom-type': 'tls-creds-x509'," + " 'id': 'tlscredsx509client0'," + " 'endpoint': 'client'," + " 'dir': %s," + " 'sanity-check': true," + " 'verify-peer': true} }", + data->workdir); + qobject_unref(rsp); + migrate_set_parameter_str(from, "tls-creds", "tlscredsx509client0"); + if (args->certhostname) { + migrate_set_parameter_str(from, "tls-hostname", args->certhostname); + } + + rsp = wait_command(to, + "{ 'execute': 'object-add'," + " 'arguments': { 'qom-type': 'tls-creds-x509'," + " 'id': 'tlscredsx509server0'," + " 'endpoint': 'server'," + " 'dir': %s," + " 'sanity-check': true," + " 'verify-peer': %i} }", + data->workdir, args->verifyclient); + qobject_unref(rsp); + migrate_set_parameter_str(to, "tls-creds", "tlscredsx509server0"); + + if (args->authzclient) { + rsp = wait_command(to, + "{ 'execute': 'object-add'," + " 'arguments': { 'qom-type': 'authz-simple'," + " 'id': 'tlsauthz0'," + " 'identity': %s} }", + "CN=" QCRYPTO_TLS_TEST_CLIENT_NAME); + migrate_set_parameter_str(to, "tls-authz", "tlsauthz0"); + } + + return data; +} + +/* + * The normal case: match server's cert hostname against + * whatever host we were telling QEMU to connect to (if any) + */ +static void * +test_migrate_tls_x509_start_default_host(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .verifyclient = true, + .clientcert = true, + .certipaddr = "127.0.0.1" + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +/* + * The unusual case: the server's cert is different from + * the address we're telling QEMU to connect to (if any), + * so we must give QEMU an explicit hostname to validate + */ +static void * +test_migrate_tls_x509_start_override_host(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .verifyclient = true, + .clientcert = true, + .certhostname = "qemu.org", + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +/* + * The unusual case: the server's cert is different from + * the address we're telling QEMU to connect to, and so we + * expect the client to reject the server + */ +static void * +test_migrate_tls_x509_start_mismatch_host(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .verifyclient = true, + .clientcert = true, + .certipaddr = "10.0.0.1", + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +static void * +test_migrate_tls_x509_start_friendly_client(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .verifyclient = true, + .clientcert = true, + .authzclient = true, + .certipaddr = "127.0.0.1", + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +static void * +test_migrate_tls_x509_start_hostile_client(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .verifyclient = true, + .clientcert = true, + .hostileclient = true, + .authzclient = true, + .certipaddr = "127.0.0.1", + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +/* + * The case with no client certificate presented, + * and no server verification + */ +static void * +test_migrate_tls_x509_start_allow_anon_client(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .certipaddr = "127.0.0.1", + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +/* + * The case with no client certificate presented, + * and server verification rejecting + */ +static void * +test_migrate_tls_x509_start_reject_anon_client(QTestState *from, + QTestState *to) +{ + TestMigrateTLSX509 args = { + .verifyclient = true, + .certipaddr = "127.0.0.1", + }; + return test_migrate_tls_x509_start_common(from, to, &args); +} + +static void +test_migrate_tls_x509_finish(QTestState *from, + QTestState *to, + void *opaque) +{ + TestMigrateTLSX509Data *data = opaque; + + test_tls_cleanup(data->keyfile); + unlink(data->cacert); + unlink(data->servercert); + unlink(data->serverkey); + unlink(data->clientcert); + unlink(data->clientkey); + rmdir(data->workdir); + + g_free(data->workdir); + g_free(data->keyfile); + g_free(data); +} +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ + static int migrate_postcopy_prepare(QTestState **from_ptr, QTestState **to_ptr, MigrateStart *args) @@ -845,6 +1174,9 @@ typedef struct { /* This test should fail, dest qemu should fail with abnormal status */ MIG_TEST_FAIL_DEST_QUIT_ERR, } result; + + /* Optional: set number of migration passes to wait for */ + unsigned int iterations; } MigrateCommon; static void test_precopy_common(MigrateCommon *args) @@ -890,7 +1222,13 @@ static void test_precopy_common(MigrateCommon *args) qtest_set_expected_status(to, 1); } } else { - wait_for_migration_pass(from); + if (args->iterations) { + while (args->iterations--) { + wait_for_migration_pass(from); + } + } else { + wait_for_migration_pass(from); + } migrate_set_parameter_int(from, "downtime-limit", CONVERGE_DOWNTIME); @@ -911,7 +1249,7 @@ static void test_precopy_common(MigrateCommon *args) test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED); } -static void test_precopy_unix(void) +static void test_precopy_unix_plain(void) { g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); MigrateCommon args = { @@ -922,6 +1260,7 @@ static void test_precopy_unix(void) test_precopy_common(&args); } + static void test_precopy_unix_dirty_ring(void) { g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); @@ -936,6 +1275,53 @@ static void test_precopy_unix_dirty_ring(void) test_precopy_common(&args); } +#ifdef CONFIG_GNUTLS +static void test_precopy_unix_tls_psk(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = uri, + .start_hook = test_migrate_tls_psk_start_match, + .finish_hook = test_migrate_tls_psk_finish, + }; + + test_precopy_common(&args); +} + +#ifdef CONFIG_TASN1 +static void test_precopy_unix_tls_x509_default_host(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .connect_uri = uri, + .listen_uri = uri, + .start_hook = test_migrate_tls_x509_start_default_host, + .finish_hook = test_migrate_tls_x509_finish, + .result = MIG_TEST_FAIL_DEST_QUIT_ERR, + }; + + test_precopy_common(&args); +} + +static void test_precopy_unix_tls_x509_override_host(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = uri, + .start_hook = test_migrate_tls_x509_start_override_host, + .finish_hook = test_migrate_tls_x509_finish, + }; + + test_precopy_common(&args); +} +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ + #if 0 /* Currently upset on aarch64 TCG */ static void test_ignore_shared(void) @@ -973,68 +1359,161 @@ static void test_ignore_shared(void) } #endif -static void test_xbzrle(const char *uri) +static void * +test_migrate_xbzrle_start(QTestState *from, + QTestState *to) { - MigrateStart args = {}; - QTestState *from, *to; - - if (test_migrate_start(&from, &to, uri, &args)) { - return; - } - - /* - * We want to pick a speed slow enough that the test completes - * quickly, but that it doesn't complete precopy even on a slow - * machine, so also set the downtime. - */ - /* 1 ms should make it not converge*/ - migrate_set_parameter_int(from, "downtime-limit", 1); - /* 1GB/s */ - migrate_set_parameter_int(from, "max-bandwidth", 1000000000); - migrate_set_parameter_int(from, "xbzrle-cache-size", 33554432); migrate_set_capability(from, "xbzrle", true); migrate_set_capability(to, "xbzrle", true); - /* Wait for the first serial output from the source */ - wait_for_serial("src_serial"); - migrate_qmp(from, uri, "{}"); + return NULL; +} - wait_for_migration_pass(from); - /* Make sure we have 2 passes, so the xbzrle cache gets a workout */ - wait_for_migration_pass(from); +static void test_precopy_unix_xbzrle(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = uri, - /* 1000ms should converge */ - migrate_set_parameter_int(from, "downtime-limit", 1000); + .start_hook = test_migrate_xbzrle_start, - if (!got_stop) { - qtest_qmp_eventwait(from, "STOP"); - } - qtest_qmp_eventwait(to, "RESUME"); + .iterations = 2, + }; - wait_for_serial("dest_serial"); - wait_for_migration_complete(from); + test_precopy_common(&args); +} - test_migrate_end(from, to, true); +static void test_precopy_tcp_plain(void) +{ + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + }; + + test_precopy_common(&args); } -static void test_xbzrle_unix(void) +#ifdef CONFIG_GNUTLS +static void test_precopy_tcp_tls_psk_match(void) { - g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_psk_start_match, + .finish_hook = test_migrate_tls_psk_finish, + }; - test_xbzrle(uri); + test_precopy_common(&args); } -static void test_precopy_tcp(void) +static void test_precopy_tcp_tls_psk_mismatch(void) { MigrateCommon args = { + .start = { + .hide_stderr = true, + }, .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_psk_start_mismatch, + .finish_hook = test_migrate_tls_psk_finish, + .result = MIG_TEST_FAIL, }; test_precopy_common(&args); } +#ifdef CONFIG_TASN1 +static void test_precopy_tcp_tls_x509_default_host(void) +{ + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_default_host, + .finish_hook = test_migrate_tls_x509_finish, + }; + + test_precopy_common(&args); +} + +static void test_precopy_tcp_tls_x509_override_host(void) +{ + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_override_host, + .finish_hook = test_migrate_tls_x509_finish, + }; + + test_precopy_common(&args); +} + +static void test_precopy_tcp_tls_x509_mismatch_host(void) +{ + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_mismatch_host, + .finish_hook = test_migrate_tls_x509_finish, + .result = MIG_TEST_FAIL_DEST_QUIT_ERR, + }; + + test_precopy_common(&args); +} + +static void test_precopy_tcp_tls_x509_friendly_client(void) +{ + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_friendly_client, + .finish_hook = test_migrate_tls_x509_finish, + }; + + test_precopy_common(&args); +} + +static void test_precopy_tcp_tls_x509_hostile_client(void) +{ + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_hostile_client, + .finish_hook = test_migrate_tls_x509_finish, + .result = MIG_TEST_FAIL, + }; + + test_precopy_common(&args); +} + +static void test_precopy_tcp_tls_x509_allow_anon_client(void) +{ + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_allow_anon_client, + .finish_hook = test_migrate_tls_x509_finish, + }; + + test_precopy_common(&args); +} + +static void test_precopy_tcp_tls_x509_reject_anon_client(void) +{ + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .listen_uri = "tcp:127.0.0.1:0", + .start_hook = test_migrate_tls_x509_start_reject_anon_client, + .finish_hook = test_migrate_tls_x509_finish, + .result = MIG_TEST_FAIL, + }; + + test_precopy_common(&args); +} +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ + static void *test_migrate_fd_start_hook(QTestState *from, QTestState *to) { @@ -1261,26 +1740,12 @@ static void test_migrate_auto_converge(void) test_migrate_end(from, to, true); } -static void test_multifd_tcp(const char *method) +static void * +test_migrate_precopy_tcp_multifd_start_common(QTestState *from, + QTestState *to, + const char *method) { - MigrateStart args = {}; - QTestState *from, *to; QDict *rsp; - g_autofree char *uri = NULL; - - if (test_migrate_start(&from, &to, "defer", &args)) { - return; - } - - /* - * We want to pick a speed slow enough that the test completes - * quickly, but that it doesn't complete precopy even on a slow - * machine, so also set the downtime. - */ - /* 1 ms should make it not converge*/ - migrate_set_parameter_int(from, "downtime-limit", 1); - /* 1GB/s */ - migrate_set_parameter_int(from, "max-bandwidth", 1000000000); migrate_set_parameter_int(from, "multifd-channels", 16); migrate_set_parameter_int(to, "multifd-channels", 16); @@ -1296,44 +1761,218 @@ static void test_multifd_tcp(const char *method) " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); qobject_unref(rsp); - /* Wait for the first serial output from the source */ - wait_for_serial("src_serial"); - - uri = migrate_get_socket_address(to, "socket-address"); - - migrate_qmp(from, uri, "{}"); - - wait_for_migration_pass(from); + return NULL; +} - migrate_set_parameter_int(from, "downtime-limit", CONVERGE_DOWNTIME); +static void * +test_migrate_precopy_tcp_multifd_start(QTestState *from, + QTestState *to) +{ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); +} - if (!got_stop) { - qtest_qmp_eventwait(from, "STOP"); - } - qtest_qmp_eventwait(to, "RESUME"); +static void * +test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from, + QTestState *to) +{ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zlib"); +} - wait_for_serial("dest_serial"); - wait_for_migration_complete(from); - test_migrate_end(from, to, true); +#ifdef CONFIG_ZSTD +static void * +test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + QTestState *to) +{ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zstd"); } +#endif /* CONFIG_ZSTD */ static void test_multifd_tcp_none(void) { - test_multifd_tcp("none"); + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_precopy_tcp_multifd_start, + }; + test_precopy_common(&args); } static void test_multifd_tcp_zlib(void) { - test_multifd_tcp("zlib"); + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_precopy_tcp_multifd_zlib_start, + }; + test_precopy_common(&args); } #ifdef CONFIG_ZSTD static void test_multifd_tcp_zstd(void) { - test_multifd_tcp("zstd"); + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_precopy_tcp_multifd_zstd_start, + }; + test_precopy_common(&args); } #endif +#ifdef CONFIG_GNUTLS +static void * +test_migrate_multifd_tcp_tls_psk_start_match(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_psk_start_match(from, to); +} + +static void * +test_migrate_multifd_tcp_tls_psk_start_mismatch(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_psk_start_mismatch(from, to); +} + +#ifdef CONFIG_TASN1 +static void * +test_migrate_multifd_tls_x509_start_default_host(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_x509_start_default_host(from, to); +} + +static void * +test_migrate_multifd_tls_x509_start_override_host(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_x509_start_override_host(from, to); +} + +static void * +test_migrate_multifd_tls_x509_start_mismatch_host(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_x509_start_mismatch_host(from, to); +} + +static void * +test_migrate_multifd_tls_x509_start_allow_anon_client(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_x509_start_allow_anon_client(from, to); +} + +static void * +test_migrate_multifd_tls_x509_start_reject_anon_client(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + return test_migrate_tls_x509_start_reject_anon_client(from, to); +} +#endif /* CONFIG_TASN1 */ + +static void test_multifd_tcp_tls_psk_match(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tcp_tls_psk_start_match, + .finish_hook = test_migrate_tls_psk_finish, + }; + test_precopy_common(&args); +} + +static void test_multifd_tcp_tls_psk_mismatch(void) +{ + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tcp_tls_psk_start_mismatch, + .finish_hook = test_migrate_tls_psk_finish, + .result = MIG_TEST_FAIL, + }; + test_precopy_common(&args); +} + +#ifdef CONFIG_TASN1 +static void test_multifd_tcp_tls_x509_default_host(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tls_x509_start_default_host, + .finish_hook = test_migrate_tls_x509_finish, + }; + test_precopy_common(&args); +} + +static void test_multifd_tcp_tls_x509_override_host(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tls_x509_start_override_host, + .finish_hook = test_migrate_tls_x509_finish, + }; + test_precopy_common(&args); +} + +static void test_multifd_tcp_tls_x509_mismatch_host(void) +{ + /* + * This has different behaviour to the non-multifd case. + * + * In non-multifd case when client aborts due to mismatched + * cert host, the server has already started trying to load + * migration state, and so it exits with I/O failure. + * + * In multifd case when client aborts due to mismatched + * cert host, the server is still waiting for the other + * multifd connections to arrive so hasn't started trying + * to load migration state, and thus just aborts the migration + * without exiting. + */ + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tls_x509_start_mismatch_host, + .finish_hook = test_migrate_tls_x509_finish, + .result = MIG_TEST_FAIL, + }; + test_precopy_common(&args); +} + +static void test_multifd_tcp_tls_x509_allow_anon_client(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tls_x509_start_allow_anon_client, + .finish_hook = test_migrate_tls_x509_finish, + }; + test_precopy_common(&args); +} + +static void test_multifd_tcp_tls_x509_reject_anon_client(void) +{ + MigrateCommon args = { + .start = { + .hide_stderr = true, + }, + .listen_uri = "defer", + .start_hook = test_migrate_multifd_tls_x509_start_reject_anon_client, + .finish_hook = test_migrate_tls_x509_finish, + .result = MIG_TEST_FAIL, + }; + test_precopy_common(&args); +} +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ + /* * This test does: * source target @@ -1497,10 +2136,44 @@ int main(int argc, char **argv) qtest_add_func("/migration/postcopy/unix", test_postcopy); qtest_add_func("/migration/postcopy/recovery", test_postcopy_recovery); qtest_add_func("/migration/bad_dest", test_baddest); - qtest_add_func("/migration/precopy/unix", test_precopy_unix); - qtest_add_func("/migration/precopy/tcp", test_precopy_tcp); + qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain); + qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle); +#ifdef CONFIG_GNUTLS + qtest_add_func("/migration/precopy/unix/tls/psk", + test_precopy_unix_tls_psk); +#ifdef CONFIG_TASN1 + qtest_add_func("/migration/precopy/unix/tls/x509/default-host", + test_precopy_unix_tls_x509_default_host); + qtest_add_func("/migration/precopy/unix/tls/x509/override-host", + test_precopy_unix_tls_x509_override_host); +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ + + qtest_add_func("/migration/precopy/tcp/plain", test_precopy_tcp_plain); +#ifdef CONFIG_GNUTLS + qtest_add_func("/migration/precopy/tcp/tls/psk/match", + test_precopy_tcp_tls_psk_match); + qtest_add_func("/migration/precopy/tcp/tls/psk/mismatch", + test_precopy_tcp_tls_psk_mismatch); +#ifdef CONFIG_TASN1 + qtest_add_func("/migration/precopy/tcp/tls/x509/default-host", + test_precopy_tcp_tls_x509_default_host); + qtest_add_func("/migration/precopy/tcp/tls/x509/override-host", + test_precopy_tcp_tls_x509_override_host); + qtest_add_func("/migration/precopy/tcp/tls/x509/mismatch-host", + test_precopy_tcp_tls_x509_mismatch_host); + qtest_add_func("/migration/precopy/tcp/tls/x509/friendly-client", + test_precopy_tcp_tls_x509_friendly_client); + qtest_add_func("/migration/precopy/tcp/tls/x509/hostile-client", + test_precopy_tcp_tls_x509_hostile_client); + qtest_add_func("/migration/precopy/tcp/tls/x509/allow-anon-client", + test_precopy_tcp_tls_x509_allow_anon_client); + qtest_add_func("/migration/precopy/tcp/tls/x509/reject-anon-client", + test_precopy_tcp_tls_x509_reject_anon_client); +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ + /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */ - qtest_add_func("/migration/xbzrle/unix", test_xbzrle_unix); qtest_add_func("/migration/fd_proto", test_migrate_fd_proto); qtest_add_func("/migration/validate_uuid", test_validate_uuid); qtest_add_func("/migration/validate_uuid_error", test_validate_uuid_error); @@ -1510,12 +2183,34 @@ int main(int argc, char **argv) test_validate_uuid_dst_not_set); qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); - qtest_add_func("/migration/multifd/tcp/none", test_multifd_tcp_none); - qtest_add_func("/migration/multifd/tcp/cancel", test_multifd_tcp_cancel); - qtest_add_func("/migration/multifd/tcp/zlib", test_multifd_tcp_zlib); + qtest_add_func("/migration/multifd/tcp/plain/none", + test_multifd_tcp_none); + qtest_add_func("/migration/multifd/tcp/plain/cancel", + test_multifd_tcp_cancel); + qtest_add_func("/migration/multifd/tcp/plain/zlib", + test_multifd_tcp_zlib); #ifdef CONFIG_ZSTD - qtest_add_func("/migration/multifd/tcp/zstd", test_multifd_tcp_zstd); + qtest_add_func("/migration/multifd/tcp/plain/zstd", + test_multifd_tcp_zstd); #endif +#ifdef CONFIG_GNUTLS + qtest_add_func("/migration/multifd/tcp/tls/psk/match", + test_multifd_tcp_tls_psk_match); + qtest_add_func("/migration/multifd/tcp/tls/psk/mismatch", + test_multifd_tcp_tls_psk_mismatch); +#ifdef CONFIG_TASN1 + qtest_add_func("/migration/multifd/tcp/tls/x509/default-host", + test_multifd_tcp_tls_x509_default_host); + qtest_add_func("/migration/multifd/tcp/tls/x509/override-host", + test_multifd_tcp_tls_x509_override_host); + qtest_add_func("/migration/multifd/tcp/tls/x509/mismatch-host", + test_multifd_tcp_tls_x509_mismatch_host); + qtest_add_func("/migration/multifd/tcp/tls/x509/allow-anon-client", + test_multifd_tcp_tls_x509_allow_anon_client); + qtest_add_func("/migration/multifd/tcp/tls/x509/reject-anon-client", + test_multifd_tcp_tls_x509_reject_anon_client); +#endif /* CONFIG_TASN1 */ +#endif /* CONFIG_GNUTLS */ if (kvm_dirty_ring_supported()) { qtest_add_func("/migration/dirty_ring", diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c index 659b5050d8..a81c2a2715 100644 --- a/tests/qtest/vhost-user-blk-test.c +++ b/tests/qtest/vhost-user-blk-test.c @@ -676,6 +676,11 @@ static void pci_hotplug(void *obj, void *data, QGuestAllocator *t_alloc) QVirtioPCIDevice *dev; QTestState *qts = dev1->pdev->bus->qts; + if (dev1->pdev->bus->not_hotpluggable) { + g_test_skip("pci bus does not support hotplug"); + return; + } + /* plug secondary disk */ qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1", "{'addr': %s, 'chardev': 'char2'}", @@ -703,6 +708,11 @@ static void multiqueue(void *obj, void *data, QGuestAllocator *t_alloc) uint64_t features; uint16_t num_queues; + if (pdev1->pdev->bus->not_hotpluggable) { + g_test_skip("bus pci.0 does not support hotplug"); + return; + } + /* * The primary device has 1 queue and VIRTIO_BLK_F_MQ is not enabled. The * VIRTIO specification allows VIRTIO_BLK_F_MQ to be enabled when there is diff --git a/tests/qtest/virtio-blk-test.c b/tests/qtest/virtio-blk-test.c index f22594a1a8..dc5eed31c8 100644 --- a/tests/qtest/virtio-blk-test.c +++ b/tests/qtest/virtio-blk-test.c @@ -701,6 +701,11 @@ static void pci_hotplug(void *obj, void *data, QGuestAllocator *t_alloc) QVirtioPCIDevice *dev; QTestState *qts = dev1->pdev->bus->qts; + if (dev1->pdev->bus->not_hotpluggable) { + g_test_skip("pci bus does not support hotplug"); + return; + } + /* plug secondary disk */ qtest_qmp_device_add(qts, "virtio-blk-pci", "drv1", "{'addr': %s, 'drive': 'drive1'}", diff --git a/tests/qtest/virtio-net-test.c b/tests/qtest/virtio-net-test.c index fc9f2b9498..6ded252901 100644 --- a/tests/qtest/virtio-net-test.c +++ b/tests/qtest/virtio-net-test.c @@ -173,6 +173,11 @@ static void hotplug(void *obj, void *data, QGuestAllocator *t_alloc) QTestState *qts = dev->pdev->bus->qts; const char *arch = qtest_get_arch(); + if (dev->pdev->bus->not_hotpluggable) { + g_test_skip("pci bus does not support hotplug"); + return; + } + qtest_qmp_device_add(qts, "virtio-net-pci", "net1", "{'addr': %s}", stringify(PCI_SLOT_HP)); diff --git a/tests/qtest/virtio-rng-test.c b/tests/qtest/virtio-rng-test.c index 092ba13068..64e131cd8c 100644 --- a/tests/qtest/virtio-rng-test.c +++ b/tests/qtest/virtio-rng-test.c @@ -20,6 +20,11 @@ static void rng_hotplug(void *obj, void *data, QGuestAllocator *alloc) QVirtioPCIDevice *dev = obj; QTestState *qts = dev->pdev->bus->qts; + if (dev->pdev->bus->not_hotpluggable) { + g_test_skip("pci bus does not support hotplug"); + return; + } + const char *arch = qtest_get_arch(); qtest_qmp_device_add(qts, "virtio-rng-pci", "rng1", diff --git a/tests/unit/crypto-tls-psk-helpers.c b/tests/unit/crypto-tls-psk-helpers.c index 4bea7c6fa2..511e08cc9c 100644 --- a/tests/unit/crypto-tls-psk-helpers.c +++ b/tests/unit/crypto-tls-psk-helpers.c @@ -24,7 +24,8 @@ #include "crypto-tls-psk-helpers.h" #include "qemu/sockets.h" -void test_tls_psk_init(const char *pskfile) +static void +test_tls_psk_init_common(const char *pskfile, const char *user, const char *key) { FILE *fp; @@ -33,11 +34,22 @@ void test_tls_psk_init(const char *pskfile) g_critical("Failed to create pskfile %s: %s", pskfile, strerror(errno)); abort(); } - /* Don't do this in real applications! Use psktool. */ - fprintf(fp, "qemu:009d5638c40fde0c\n"); + fprintf(fp, "%s:%s\n", user, key); fclose(fp); } +void test_tls_psk_init(const char *pskfile) +{ + /* Don't hard code a key like this in real applications! Use psktool. */ + test_tls_psk_init_common(pskfile, "qemu", "009d5638c40fde0c"); +} + +void test_tls_psk_init_alt(const char *pskfile) +{ + /* Don't hard code a key like this in real applications! Use psktool. */ + test_tls_psk_init_common(pskfile, "qemu", "10ffa6a2c42f0388"); +} + void test_tls_psk_cleanup(const char *pskfile) { unlink(pskfile); diff --git a/tests/unit/crypto-tls-psk-helpers.h b/tests/unit/crypto-tls-psk-helpers.h index faa645c629..67f8bdda71 100644 --- a/tests/unit/crypto-tls-psk-helpers.h +++ b/tests/unit/crypto-tls-psk-helpers.h @@ -24,6 +24,7 @@ #include <gnutls/gnutls.h> void test_tls_psk_init(const char *keyfile); +void test_tls_psk_init_alt(const char *keyfile); void test_tls_psk_cleanup(const char *keyfile); #endif diff --git a/tests/unit/crypto-tls-x509-helpers.c b/tests/unit/crypto-tls-x509-helpers.c index fc609b3fd4..e9937f60d8 100644 --- a/tests/unit/crypto-tls-x509-helpers.c +++ b/tests/unit/crypto-tls-x509-helpers.c @@ -168,9 +168,19 @@ test_tls_get_ipaddr(const char *addrstr, hints.ai_flags = AI_NUMERICHOST; g_assert(getaddrinfo(addrstr, NULL, &hints, &res) == 0); - *datalen = res->ai_addrlen; - *data = g_new(char, *datalen); - memcpy(*data, res->ai_addr, *datalen); + if (res->ai_family == AF_INET) { + struct sockaddr_in *in = (struct sockaddr_in *)res->ai_addr; + *datalen = sizeof(in->sin_addr); + *data = g_new(char, *datalen); + memcpy(*data, &in->sin_addr, *datalen); + } else if (res->ai_family == AF_INET6) { + struct sockaddr_in6 *in = (struct sockaddr_in6 *)res->ai_addr; + *datalen = sizeof(in->sin6_addr); + *data = g_new(char, *datalen); + memcpy(*data, &in->sin6_addr, *datalen); + } else { + g_assert_not_reached(); + } freeaddrinfo(res); } diff --git a/tests/unit/crypto-tls-x509-helpers.h b/tests/unit/crypto-tls-x509-helpers.h index cf6329e653..247e7160eb 100644 --- a/tests/unit/crypto-tls-x509-helpers.h +++ b/tests/unit/crypto-tls-x509-helpers.h @@ -26,6 +26,9 @@ #include <libtasn1.h> +#define QCRYPTO_TLS_TEST_CLIENT_NAME "ACME QEMU Client" +#define QCRYPTO_TLS_TEST_CLIENT_HOSTILE_NAME "ACME Hostile Client" + /* * This contains parameter about how to generate * certificates. @@ -118,6 +121,56 @@ void test_tls_cleanup(const char *keyfile); }; \ test_tls_generate_cert(&varname, NULL) +# define TLS_ROOT_REQ_SIMPLE(varname, fname) \ + QCryptoTLSTestCertReq varname = { \ + .filename = fname, \ + .cn = "qemu-CA", \ + .basicConstraintsEnable = true, \ + .basicConstraintsCritical = true, \ + .basicConstraintsIsCA = true, \ + .keyUsageEnable = true, \ + .keyUsageCritical = true, \ + .keyUsageValue = GNUTLS_KEY_KEY_CERT_SIGN, \ + }; \ + test_tls_generate_cert(&varname, NULL) + +# define TLS_CERT_REQ_SIMPLE_CLIENT(varname, cavarname, cname, fname) \ + QCryptoTLSTestCertReq varname = { \ + .filename = fname, \ + .cn = cname, \ + .basicConstraintsEnable = true, \ + .basicConstraintsCritical = true, \ + .basicConstraintsIsCA = false, \ + .keyUsageEnable = true, \ + .keyUsageCritical = true, \ + .keyUsageValue = \ + GNUTLS_KEY_DIGITAL_SIGNATURE | GNUTLS_KEY_KEY_ENCIPHERMENT, \ + .keyPurposeEnable = true, \ + .keyPurposeCritical = true, \ + .keyPurposeOID1 = GNUTLS_KP_TLS_WWW_CLIENT, \ + }; \ + test_tls_generate_cert(&varname, cavarname.crt) + +# define TLS_CERT_REQ_SIMPLE_SERVER(varname, cavarname, fname, \ + hostname, ipaddr) \ + QCryptoTLSTestCertReq varname = { \ + .filename = fname, \ + .cn = hostname ? hostname : ipaddr, \ + .altname1 = hostname, \ + .ipaddr1 = ipaddr, \ + .basicConstraintsEnable = true, \ + .basicConstraintsCritical = true, \ + .basicConstraintsIsCA = false, \ + .keyUsageEnable = true, \ + .keyUsageCritical = true, \ + .keyUsageValue = \ + GNUTLS_KEY_DIGITAL_SIGNATURE | GNUTLS_KEY_KEY_ENCIPHERMENT, \ + .keyPurposeEnable = true, \ + .keyPurposeCritical = true, \ + .keyPurposeOID1 = GNUTLS_KP_TLS_WWW_SERVER, \ + }; \ + test_tls_generate_cert(&varname, cavarname.crt) + extern const asn1_static_node pkix_asn1_tab[]; #endif diff --git a/tests/unit/test-crypto-tlssession.c b/tests/unit/test-crypto-tlssession.c index a266dc32da..f222959d36 100644 --- a/tests/unit/test-crypto-tlssession.c +++ b/tests/unit/test-crypto-tlssession.c @@ -512,12 +512,19 @@ int main(int argc, char **argv) false, true, "wiki.qemu.org", NULL); TEST_SESS_REG(altname4, cacertreq.filename, + servercertalt1req.filename, clientcertreq.filename, + false, false, "192.168.122.1", NULL); + TEST_SESS_REG(altname5, cacertreq.filename, + servercertalt1req.filename, clientcertreq.filename, + false, false, "fec0::dead:beaf", NULL); + + TEST_SESS_REG(altname6, cacertreq.filename, servercertalt2req.filename, clientcertreq.filename, false, true, "qemu.org", NULL); - TEST_SESS_REG(altname5, cacertreq.filename, + TEST_SESS_REG(altname7, cacertreq.filename, servercertalt2req.filename, clientcertreq.filename, false, false, "www.qemu.org", NULL); - TEST_SESS_REG(altname6, cacertreq.filename, + TEST_SESS_REG(altname8, cacertreq.filename, servercertalt2req.filename, clientcertreq.filename, false, false, "wiki.qemu.org", NULL); diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c index c49eec1f03..6713886d02 100644 --- a/tests/unit/test-io-channel-socket.c +++ b/tests/unit/test-io-channel-socket.c @@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) G_N_ELEMENTS(iosend), fdsend, G_N_ELEMENTS(fdsend), + 0, &error_abort); qio_channel_readv_full(dst, diff --git a/tools/virtiofsd/passthrough_seccomp.h b/tools/virtiofsd/passthrough_seccomp.h index a3ab073f08..12674fc050 100644 --- a/tools/virtiofsd/passthrough_seccomp.h +++ b/tools/virtiofsd/passthrough_seccomp.h @@ -6,10 +6,9 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef VIRTIOFSD_SECCOMP_H -#define VIRTIOFSD_SECCOMP_H - +#ifndef VIRTIOFSD_PASSTHROUGH_SECCOMP_H +#define VIRTIOFSD_PASSTHROUGH_SECCOMP_H void setup_seccomp(bool enable_syslog); -#endif /* VIRTIOFSD_SECCOMP_H */ +#endif /* VIRTIOFSD_PASSTHROUGH_SECCOMP_H */ diff --git a/ui/console.c b/ui/console.c index 15d0f6affd..36c80cd1de 100644 --- a/ui/console.c +++ b/ui/console.c @@ -221,7 +221,7 @@ static void gui_setup_refresh(DisplayState *ds) void graphic_hw_update_done(QemuConsole *con) { if (con) { - qemu_co_queue_restart_all(&con->dump_queue); + qemu_co_enter_all(&con->dump_queue, NULL); } } @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef UI_DBUS_H_ -#define UI_DBUS_H_ + +#ifndef UI_DBUS_H +#define UI_DBUS_H #include "chardev/char-socket.h" #include "qemu/dbus.h" @@ -144,4 +145,4 @@ void dbus_chardev_init(DBusDisplay *dpy); void dbus_clipboard_init(DBusDisplay *dpy); -#endif /* UI_DBUS_H_ */ +#endif /* UI_DBUS_H */ diff --git a/ui/meson.build b/ui/meson.build index eba93b41e3..e9f48c5315 100644 --- a/ui/meson.build +++ b/ui/meson.build @@ -2,6 +2,7 @@ softmmu_ss.add(pixman) specific_ss.add(when: ['CONFIG_SOFTMMU'], if_true: pixman) # for the include path specific_ss.add(when: ['CONFIG_SOFTMMU'], if_true: opengl) # for the include path +softmmu_ss.add(png) softmmu_ss.add(files( 'clipboard.c', 'console.c', @@ -40,7 +41,7 @@ vnc_ss.add(files( 'vnc-jobs.c', 'vnc-clipboard.c', )) -vnc_ss.add(zlib, png, jpeg, gnutls) +vnc_ss.add(zlib, jpeg, gnutls) vnc_ss.add(when: sasl, if_true: files('vnc-auth-sasl.c')) softmmu_ss.add_all(when: vnc, if_true: vnc_ss) softmmu_ss.add(when: vnc, if_false: files('vnc-stubs.c')) diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c index 2669403839..9ad24ab1af 100644 --- a/util/qemu-coroutine-lock.c +++ b/util/qemu-coroutine-lock.c @@ -67,34 +67,6 @@ void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock) } } -static bool qemu_co_queue_do_restart(CoQueue *queue, bool single) -{ - Coroutine *next; - - if (QSIMPLEQ_EMPTY(&queue->entries)) { - return false; - } - - while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) { - QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next); - aio_co_wake(next); - if (single) { - break; - } - } - return true; -} - -bool qemu_co_queue_next(CoQueue *queue) -{ - return qemu_co_queue_do_restart(queue, true); -} - -void qemu_co_queue_restart_all(CoQueue *queue) -{ - qemu_co_queue_do_restart(queue, false); -} - bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock) { Coroutine *next; @@ -115,6 +87,25 @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock) return true; } +bool coroutine_fn qemu_co_queue_next(CoQueue *queue) +{ + /* No unlock/lock needed in coroutine context. */ + return qemu_co_enter_next_impl(queue, NULL); +} + +void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock) +{ + while (qemu_co_enter_next_impl(queue, lock)) { + /* just loop */ + } +} + +void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue) +{ + /* No unlock/lock needed in coroutine context. */ + qemu_co_enter_all_impl(queue, NULL); +} + bool qemu_co_queue_empty(CoQueue *queue) { return QSIMPLEQ_FIRST(&queue->entries) == NULL; diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index f3e8300c8d..4a8bd63ef0 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -21,14 +21,20 @@ #include "qemu/coroutine-tls.h" #include "block/aio.h" -/** Initial batch size is 64, and is increased on demand */ +/** + * The minimal batch size is always 64, coroutines from the release_pool are + * reused as soon as there are 64 coroutines in it. The maximum pool size starts + * with 64 and is increased on demand so that coroutines are not deleted even if + * they are not immediately reused. + */ enum { - POOL_INITIAL_BATCH_SIZE = 64, + POOL_MIN_BATCH_SIZE = 64, + POOL_INITIAL_MAX_SIZE = 64, }; /** Free list to speed up creation */ static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); -static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; +static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; static unsigned int release_pool_size; typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; @@ -57,7 +63,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) co = QSLIST_FIRST(alloc_pool); if (!co) { - if (release_pool_size > qatomic_read(&pool_batch_size)) { + if (release_pool_size > POOL_MIN_BATCH_SIZE) { /* Slow path; a good place to register the destructor, too. */ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); if (!notifier->notify) { @@ -95,12 +101,12 @@ static void coroutine_delete(Coroutine *co) co->caller = NULL; if (CONFIG_COROUTINE_POOL) { - if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { + if (release_pool_size < qatomic_read(&pool_max_size) * 2) { QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); qatomic_inc(&release_pool_size); return; } - if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) { + if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); set_alloc_pool_size(get_alloc_pool_size() + 1); return; @@ -212,12 +218,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) return co->ctx; } -void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) +void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) { - qatomic_add(&pool_batch_size, additional_pool_size); + qatomic_add(&pool_max_size, additional_pool_size); } -void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) +void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) { - qatomic_sub(&pool_batch_size, removing_pool_size); + qatomic_sub(&pool_max_size, removing_pool_size); } |