diff options
89 files changed, 4549 insertions, 1075 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index fd335a47bf..b68cb7e133 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -783,6 +783,11 @@ M: Samuel Thibault <samuel.thibault@ens-lyon.org> S: Maintained F: backends/baum.c +Coverity model +M: Markus Armbruster <armbru@redhat.com> +S: Supported +F: scripts/coverity-model.c + CPU M: Andreas Färber <afaerber@suse.de> S: Supported diff --git a/Makefile.objs b/Makefile.objs index abeb902b58..28999d39c4 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -51,6 +51,7 @@ common-obj-$(CONFIG_LINUX) += fsdev/ common-obj-y += migration/ common-obj-y += qemu-char.o #aio.o common-obj-y += page_cache.o +common-obj-y += qjson.o common-obj-$(CONFIG_SPICE) += spice-qemu-char.o @@ -36,6 +36,21 @@ static QEMUBalloonEvent *balloon_event_fn; static QEMUBalloonStatus *balloon_stat_fn; static void *balloon_opaque; +static bool have_ballon(Error **errp) +{ + if (kvm_enabled() && !kvm_has_sync_mmu()) { + error_set(errp, ERROR_CLASS_KVM_MISSING_CAP, + "Using KVM without synchronous MMU, balloon unavailable"); + return false; + } + if (!balloon_event_fn) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, + "No balloon device has been activated"); + return false; + } + return true; +} + int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, QEMUBalloonStatus *stat_func, void *opaque) { @@ -62,58 +77,30 @@ void qemu_remove_balloon_handler(void *opaque) balloon_opaque = NULL; } -static int qemu_balloon(ram_addr_t target) -{ - if (!balloon_event_fn) { - return 0; - } - trace_balloon_event(balloon_opaque, target); - balloon_event_fn(balloon_opaque, target); - return 1; -} - -static int qemu_balloon_status(BalloonInfo *info) -{ - if (!balloon_stat_fn) { - return 0; - } - balloon_stat_fn(balloon_opaque, info); - return 1; -} - BalloonInfo *qmp_query_balloon(Error **errp) { BalloonInfo *info; - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon"); + if (!have_ballon(errp)) { return NULL; } info = g_malloc0(sizeof(*info)); - - if (qemu_balloon_status(info) == 0) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon"); - qapi_free_BalloonInfo(info); - return NULL; - } - + balloon_stat_fn(balloon_opaque, info); return info; } -void qmp_balloon(int64_t value, Error **errp) +void qmp_balloon(int64_t target, Error **errp) { - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon"); + if (!have_ballon(errp)) { return; } - if (value <= 0) { + if (target <= 0) { error_set(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size"); return; } - - if (qemu_balloon(value) == 0) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon"); - } + + trace_balloon_event(balloon_opaque, target); + balloon_event_fn(balloon_opaque, target); } diff --git a/cpu-exec.c b/cpu-exec.c index a4f0effaf4..fa506e628a 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -61,8 +61,7 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu) sleep_delay.tv_sec = sc->diff_clk / 1000000000LL; sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL; if (nanosleep(&sleep_delay, &rem_delay) < 0) { - sc->diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL; - sc->diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec; + sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec; } else { sc->diff_clk = 0; } @@ -101,10 +100,8 @@ static void init_delay_params(SyncClocks *sc, if (!icount_align_option) { return; } - sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - - sc->realtime_clock + - cpu_get_clock_offset(); + sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); + sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock; sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; if (sc->diff_clk < max_delay) { max_delay = sc->diff_clk; @@ -229,23 +229,6 @@ int64_t cpu_get_clock(void) return ti; } -/* return the offset between the host clock and virtual CPU clock */ -int64_t cpu_get_clock_offset(void) -{ - int64_t ti; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - ti = timers_state.cpu_clock_offset; - if (!timers_state.cpu_ticks_enabled) { - ti -= get_clock(); - } - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return -ti; -} - /* enable cpu_get_ticks() * Caller must hold BQL which server as mutex for vm_clock_seqlock. */ diff --git a/disas/arm-a64.cc b/disas/arm-a64.cc index ca29f6f253..e04f946ca3 100644 --- a/disas/arm-a64.cc +++ b/disas/arm-a64.cc @@ -67,7 +67,8 @@ static void vixl_init(FILE *f) { int print_insn_arm_a64(uint64_t addr, disassemble_info *info) { uint8_t bytes[INSN_SIZE]; - uint32_t instr; + uint32_t instrval; + const Instruction *instr; int status; status = info->read_memory_func(addr, bytes, INSN_SIZE, info); @@ -80,8 +81,10 @@ int print_insn_arm_a64(uint64_t addr, disassemble_info *info) vixl_init(info->stream); } - instr = bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24; - vixl_decoder->Decode(reinterpret_cast<Instruction*>(&instr)); + instrval = bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24; + instr = reinterpret_cast<const Instruction *>(&instrval); + vixl_disasm->MapCodeAddress(addr, instr); + vixl_decoder->Decode(instr); return INSN_SIZE; } diff --git a/disas/libvixl/README b/disas/libvixl/README index cba31b458b..58db41c67c 100644 --- a/disas/libvixl/README +++ b/disas/libvixl/README @@ -2,7 +2,7 @@ The code in this directory is a subset of libvixl: https://github.com/armvixl/vixl (specifically, it is the set of files needed for disassembly only, -taken from libvixl 1.6). +taken from libvixl 1.7). Bugfixes should preferably be sent upstream initially. The disassembler does not currently support the entire A64 instruction diff --git a/disas/libvixl/a64/assembler-a64.h b/disas/libvixl/a64/assembler-a64.h index 16a704b7d4..35aaf20f72 100644 --- a/disas/libvixl/a64/assembler-a64.h +++ b/disas/libvixl/a64/assembler-a64.h @@ -151,21 +151,21 @@ class CPURegister { return Aliases(other) && (size_ == other.size_); } - inline bool IsZero() const { + bool IsZero() const { VIXL_ASSERT(IsValid()); return IsRegister() && (code_ == kZeroRegCode); } - inline bool IsSP() const { + bool IsSP() const { VIXL_ASSERT(IsValid()); return IsRegister() && (code_ == kSPRegInternalCode); } - inline bool IsRegister() const { + bool IsRegister() const { return type_ == kRegister; } - inline bool IsFPRegister() const { + bool IsFPRegister() const { return type_ == kFPRegister; } @@ -179,7 +179,7 @@ class CPURegister { const FPRegister& S() const; const FPRegister& D() const; - inline bool IsSameSizeAndType(const CPURegister& other) const { + bool IsSameSizeAndType(const CPURegister& other) const { return (size_ == other.size_) && (type_ == other.type_); } @@ -198,7 +198,7 @@ class CPURegister { class Register : public CPURegister { public: Register() : CPURegister() {} - inline explicit Register(const CPURegister& other) + explicit Register(const CPURegister& other) : CPURegister(other.code(), other.size(), other.type()) { VIXL_ASSERT(IsValidRegister()); } @@ -213,10 +213,6 @@ class Register : public CPURegister { static const Register& WRegFromCode(unsigned code); static const Register& XRegFromCode(unsigned code); - // V8 compatibility. - static const int kNumRegisters = kNumberOfRegisters; - static const int kNumAllocatableRegisters = kNumberOfRegisters - 1; - private: static const Register wregisters[]; static const Register xregisters[]; @@ -225,12 +221,12 @@ class Register : public CPURegister { class FPRegister : public CPURegister { public: - inline FPRegister() : CPURegister() {} - inline explicit FPRegister(const CPURegister& other) + FPRegister() : CPURegister() {} + explicit FPRegister(const CPURegister& other) : CPURegister(other.code(), other.size(), other.type()) { VIXL_ASSERT(IsValidFPRegister()); } - inline FPRegister(unsigned code, unsigned size) + FPRegister(unsigned code, unsigned size) : CPURegister(code, size, kFPRegister) {} bool IsValid() const { @@ -241,10 +237,6 @@ class FPRegister : public CPURegister { static const FPRegister& SRegFromCode(unsigned code); static const FPRegister& DRegFromCode(unsigned code); - // V8 compatibility. - static const int kNumRegisters = kNumberOfFPRegisters; - static const int kNumAllocatableRegisters = kNumberOfFPRegisters - 1; - private: static const FPRegister sregisters[]; static const FPRegister dregisters[]; @@ -312,23 +304,23 @@ bool AreSameSizeAndType(const CPURegister& reg1, // Lists of registers. class CPURegList { public: - inline explicit CPURegList(CPURegister reg1, - CPURegister reg2 = NoCPUReg, - CPURegister reg3 = NoCPUReg, - CPURegister reg4 = NoCPUReg) + explicit CPURegList(CPURegister reg1, + CPURegister reg2 = NoCPUReg, + CPURegister reg3 = NoCPUReg, + CPURegister reg4 = NoCPUReg) : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()), size_(reg1.size()), type_(reg1.type()) { VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4)); VIXL_ASSERT(IsValid()); } - inline CPURegList(CPURegister::RegisterType type, unsigned size, RegList list) + CPURegList(CPURegister::RegisterType type, unsigned size, RegList list) : list_(list), size_(size), type_(type) { VIXL_ASSERT(IsValid()); } - inline CPURegList(CPURegister::RegisterType type, unsigned size, - unsigned first_reg, unsigned last_reg) + CPURegList(CPURegister::RegisterType type, unsigned size, + unsigned first_reg, unsigned last_reg) : size_(size), type_(type) { VIXL_ASSERT(((type == CPURegister::kRegister) && (last_reg < kNumberOfRegisters)) || @@ -340,7 +332,7 @@ class CPURegList { VIXL_ASSERT(IsValid()); } - inline CPURegister::RegisterType type() const { + CPURegister::RegisterType type() const { VIXL_ASSERT(IsValid()); return type_; } @@ -366,13 +358,13 @@ class CPURegList { } // Variants of Combine and Remove which take a single register. - inline void Combine(const CPURegister& other) { + void Combine(const CPURegister& other) { VIXL_ASSERT(other.type() == type_); VIXL_ASSERT(other.size() == size_); Combine(other.code()); } - inline void Remove(const CPURegister& other) { + void Remove(const CPURegister& other) { VIXL_ASSERT(other.type() == type_); VIXL_ASSERT(other.size() == size_); Remove(other.code()); @@ -380,24 +372,51 @@ class CPURegList { // Variants of Combine and Remove which take a single register by its code; // the type and size of the register is inferred from this list. - inline void Combine(int code) { + void Combine(int code) { VIXL_ASSERT(IsValid()); VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); list_ |= (UINT64_C(1) << code); } - inline void Remove(int code) { + void Remove(int code) { VIXL_ASSERT(IsValid()); VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); list_ &= ~(UINT64_C(1) << code); } - inline RegList list() const { + static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) { + VIXL_ASSERT(list_1.type_ == list_2.type_); + VIXL_ASSERT(list_1.size_ == list_2.size_); + return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_); + } + static CPURegList Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3); + static CPURegList Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4); + + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2) { + VIXL_ASSERT(list_1.type_ == list_2.type_); + VIXL_ASSERT(list_1.size_ == list_2.size_); + return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_); + } + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3); + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4); + + RegList list() const { VIXL_ASSERT(IsValid()); return list_; } - inline void set_list(RegList new_list) { + void set_list(RegList new_list) { VIXL_ASSERT(IsValid()); list_ = new_list; } @@ -417,38 +436,38 @@ class CPURegList { static CPURegList GetCallerSaved(unsigned size = kXRegSize); static CPURegList GetCallerSavedFP(unsigned size = kDRegSize); - inline bool IsEmpty() const { + bool IsEmpty() const { VIXL_ASSERT(IsValid()); return list_ == 0; } - inline bool IncludesAliasOf(const CPURegister& other) const { + bool IncludesAliasOf(const CPURegister& other) const { VIXL_ASSERT(IsValid()); return (type_ == other.type()) && ((other.Bit() & list_) != 0); } - inline bool IncludesAliasOf(int code) const { + bool IncludesAliasOf(int code) const { VIXL_ASSERT(IsValid()); return ((code & list_) != 0); } - inline int Count() const { + int Count() const { VIXL_ASSERT(IsValid()); return CountSetBits(list_, kRegListSizeInBits); } - inline unsigned RegisterSizeInBits() const { + unsigned RegisterSizeInBits() const { VIXL_ASSERT(IsValid()); return size_; } - inline unsigned RegisterSizeInBytes() const { + unsigned RegisterSizeInBytes() const { int size_in_bits = RegisterSizeInBits(); VIXL_ASSERT((size_in_bits % 8) == 0); return size_in_bits / 8; } - inline unsigned TotalSizeInBytes() const { + unsigned TotalSizeInBytes() const { VIXL_ASSERT(IsValid()); return RegisterSizeInBytes() * Count(); } @@ -587,8 +606,10 @@ class Label { VIXL_ASSERT(!IsLinked() || IsBound()); } - inline bool IsBound() const { return location_ >= 0; } - inline bool IsLinked() const { return !links_.empty(); } + bool IsBound() const { return location_ >= 0; } + bool IsLinked() const { return !links_.empty(); } + + ptrdiff_t location() const { return location_; } private: // The list of linked instructions is stored in a stack-like structure. We @@ -647,22 +668,20 @@ class Label { std::stack<ptrdiff_t> * links_extended_; }; - inline ptrdiff_t location() const { return location_; } - - inline void Bind(ptrdiff_t location) { + void Bind(ptrdiff_t location) { // Labels can only be bound once. VIXL_ASSERT(!IsBound()); location_ = location; } - inline void AddLink(ptrdiff_t instruction) { + void AddLink(ptrdiff_t instruction) { // If a label is bound, the assembler already has the information it needs // to write the instruction, so there is no need to add it to links_. VIXL_ASSERT(!IsBound()); links_.push(instruction); } - inline ptrdiff_t GetAndRemoveNextLink() { + ptrdiff_t GetAndRemoveNextLink() { VIXL_ASSERT(IsLinked()); ptrdiff_t link = links_.top(); links_.pop(); @@ -845,14 +864,14 @@ class Assembler { // Return the address of an offset in the buffer. template <typename T> - inline T GetOffsetAddress(ptrdiff_t offset) { + T GetOffsetAddress(ptrdiff_t offset) { VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); return buffer_->GetOffsetAddress<T>(offset); } // Return the address of a bound label. template <typename T> - inline T GetLabelAddress(const Label * label) { + T GetLabelAddress(const Label * label) { VIXL_ASSERT(label->IsBound()); VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); return GetOffsetAddress<T>(label->location()); @@ -860,14 +879,14 @@ class Assembler { // Return the address of the cursor. template <typename T> - inline T GetCursorAddress() { + T GetCursorAddress() { VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); return GetOffsetAddress<T>(CursorOffset()); } // Return the address of the start of the buffer. template <typename T> - inline T GetStartAddress() { + T GetStartAddress() { VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); return GetOffsetAddress<T>(0); } @@ -1074,20 +1093,20 @@ class Assembler { // Bfm aliases. // Bitfield insert. - inline void bfi(const Register& rd, - const Register& rn, - unsigned lsb, - unsigned width) { + void bfi(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { VIXL_ASSERT(width >= 1); VIXL_ASSERT(lsb + width <= rn.size()); bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); } // Bitfield extract and insert low. - inline void bfxil(const Register& rd, - const Register& rn, - unsigned lsb, - unsigned width) { + void bfxil(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { VIXL_ASSERT(width >= 1); VIXL_ASSERT(lsb + width <= rn.size()); bfm(rd, rn, lsb, lsb + width - 1); @@ -1095,92 +1114,92 @@ class Assembler { // Sbfm aliases. // Arithmetic shift right. - inline void asr(const Register& rd, const Register& rn, unsigned shift) { + void asr(const Register& rd, const Register& rn, unsigned shift) { VIXL_ASSERT(shift < rd.size()); sbfm(rd, rn, shift, rd.size() - 1); } // Signed bitfield insert with zero at right. - inline void sbfiz(const Register& rd, - const Register& rn, - unsigned lsb, - unsigned width) { + void sbfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { VIXL_ASSERT(width >= 1); VIXL_ASSERT(lsb + width <= rn.size()); sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); } // Signed bitfield extract. - inline void sbfx(const Register& rd, - const Register& rn, - unsigned lsb, - unsigned width) { + void sbfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { VIXL_ASSERT(width >= 1); VIXL_ASSERT(lsb + width <= rn.size()); sbfm(rd, rn, lsb, lsb + width - 1); } // Signed extend byte. - inline void sxtb(const Register& rd, const Register& rn) { + void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } // Signed extend halfword. - inline void sxth(const Register& rd, const Register& rn) { + void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } // Signed extend word. - inline void sxtw(const Register& rd, const Register& rn) { + void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } // Ubfm aliases. // Logical shift left. - inline void lsl(const Register& rd, const Register& rn, unsigned shift) { + void lsl(const Register& rd, const Register& rn, unsigned shift) { unsigned reg_size = rd.size(); VIXL_ASSERT(shift < reg_size); ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); } // Logical shift right. - inline void lsr(const Register& rd, const Register& rn, unsigned shift) { + void lsr(const Register& rd, const Register& rn, unsigned shift) { VIXL_ASSERT(shift < rd.size()); ubfm(rd, rn, shift, rd.size() - 1); } // Unsigned bitfield insert with zero at right. - inline void ubfiz(const Register& rd, - const Register& rn, - unsigned lsb, - unsigned width) { + void ubfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { VIXL_ASSERT(width >= 1); VIXL_ASSERT(lsb + width <= rn.size()); ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); } // Unsigned bitfield extract. - inline void ubfx(const Register& rd, - const Register& rn, - unsigned lsb, - unsigned width) { + void ubfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { VIXL_ASSERT(width >= 1); VIXL_ASSERT(lsb + width <= rn.size()); ubfm(rd, rn, lsb, lsb + width - 1); } // Unsigned extend byte. - inline void uxtb(const Register& rd, const Register& rn) { + void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } // Unsigned extend halfword. - inline void uxth(const Register& rd, const Register& rn) { + void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } // Unsigned extend word. - inline void uxtw(const Register& rd, const Register& rn) { + void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } @@ -1230,7 +1249,7 @@ class Assembler { void cneg(const Register& rd, const Register& rn, Condition cond); // Rotate right. - inline void ror(const Register& rd, const Register& rs, unsigned shift) { + void ror(const Register& rd, const Register& rs, unsigned shift) { extr(rd, rs, rs, shift); } @@ -1495,6 +1514,19 @@ class Assembler { // Load-acquire register. void ldar(const Register& rt, const MemOperand& src); + // Prefetch memory. + void prfm(PrefetchOperation op, const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + // Prefetch memory (with unscaled offset). + void prfum(PrefetchOperation op, const MemOperand& addr, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Prefetch memory in the literal pool. + void prfm(PrefetchOperation op, RawLiteral* literal); + + // Prefetch from pc + imm19 << 2. + void prfm(PrefetchOperation op, int imm19); // Move instructions. The default shift of -1 indicates that the move // instruction will calculate an appropriate 16-bit immediate and left shift @@ -1638,12 +1670,21 @@ class Assembler { // FP round to integer (nearest with ties to away). void frinta(const FPRegister& fd, const FPRegister& fn); + // FP round to integer (implicit rounding). + void frinti(const FPRegister& fd, const FPRegister& fn); + // FP round to integer (toward minus infinity). void frintm(const FPRegister& fd, const FPRegister& fn); // FP round to integer (nearest with ties to even). void frintn(const FPRegister& fd, const FPRegister& fn); + // FP round to integer (toward plus infinity). + void frintp(const FPRegister& fd, const FPRegister& fn); + + // FP round to integer (exact, implicit rounding). + void frintx(const FPRegister& fd, const FPRegister& fn); + // FP round to integer (towards zero). void frintz(const FPRegister& fd, const FPRegister& fn); @@ -1705,16 +1746,16 @@ class Assembler { // Emit generic instructions. // Emit raw instructions into the instruction stream. - inline void dci(Instr raw_inst) { Emit(raw_inst); } + void dci(Instr raw_inst) { Emit(raw_inst); } // Emit 32 bits of data into the instruction stream. - inline void dc32(uint32_t data) { + void dc32(uint32_t data) { VIXL_ASSERT(buffer_monitor_ > 0); buffer_->Emit32(data); } // Emit 64 bits of data into the instruction stream. - inline void dc64(uint64_t data) { + void dc64(uint64_t data) { VIXL_ASSERT(buffer_monitor_ > 0); buffer_->Emit64(data); } @@ -1849,14 +1890,14 @@ class Assembler { } } - static inline Instr ImmS(unsigned imms, unsigned reg_size) { + static Instr ImmS(unsigned imms, unsigned reg_size) { VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) || ((reg_size == kWRegSize) && is_uint5(imms))); USE(reg_size); return imms << ImmS_offset; } - static inline Instr ImmR(unsigned immr, unsigned reg_size) { + static Instr ImmR(unsigned immr, unsigned reg_size) { VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) || ((reg_size == kWRegSize) && is_uint5(immr))); USE(reg_size); @@ -1864,7 +1905,7 @@ class Assembler { return immr << ImmR_offset; } - static inline Instr ImmSetBits(unsigned imms, unsigned reg_size) { + static Instr ImmSetBits(unsigned imms, unsigned reg_size) { VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); VIXL_ASSERT(is_uint6(imms)); VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3)); @@ -1872,7 +1913,7 @@ class Assembler { return imms << ImmSetBits_offset; } - static inline Instr ImmRotate(unsigned immr, unsigned reg_size) { + static Instr ImmRotate(unsigned immr, unsigned reg_size) { VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) || ((reg_size == kWRegSize) && is_uint5(immr))); @@ -1880,12 +1921,12 @@ class Assembler { return immr << ImmRotate_offset; } - static inline Instr ImmLLiteral(int imm19) { + static Instr ImmLLiteral(int imm19) { VIXL_ASSERT(is_int19(imm19)); return truncate_to_int19(imm19) << ImmLLiteral_offset; } - static inline Instr BitN(unsigned bitn, unsigned reg_size) { + static Instr BitN(unsigned bitn, unsigned reg_size) { VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0)); USE(reg_size); @@ -1943,6 +1984,11 @@ class Assembler { return shift_amount << ImmShiftLS_offset; } + static Instr ImmPrefetchOperation(int imm5) { + VIXL_ASSERT(is_uint5(imm5)); + return imm5 << ImmPrefetchOperation_offset; + } + static Instr ImmException(int imm16) { VIXL_ASSERT(is_uint16(imm16)); return imm16 << ImmException_offset; @@ -2003,12 +2049,32 @@ class Assembler { return scale << FPScale_offset; } + // Immediate field checking helpers. + static bool IsImmAddSub(int64_t immediate); + static bool IsImmConditionalCompare(int64_t immediate); + static bool IsImmFP32(float imm); + static bool IsImmFP64(double imm); + static bool IsImmLogical(uint64_t value, + unsigned width, + unsigned* n = NULL, + unsigned* imm_s = NULL, + unsigned* imm_r = NULL); + static bool IsImmLSPair(int64_t offset, LSDataSize size); + static bool IsImmLSScaled(int64_t offset, LSDataSize size); + static bool IsImmLSUnscaled(int64_t offset); + static bool IsImmMovn(uint64_t imm, unsigned reg_size); + static bool IsImmMovz(uint64_t imm, unsigned reg_size); + // Size of the code generated since label to the current position. size_t SizeOfCodeGeneratedSince(Label* label) const { VIXL_ASSERT(label->IsBound()); return buffer_->OffsetFrom(label->location()); } + size_t SizeOfCodeGenerated() const { + return buffer_->CursorOffset(); + } + size_t BufferCapacity() const { return buffer_->capacity(); } size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); } @@ -2025,7 +2091,7 @@ class Assembler { } } -#ifdef DEBUG +#ifdef VIXL_DEBUG void AcquireBuffer() { VIXL_ASSERT(buffer_monitor_ >= 0); buffer_monitor_++; @@ -2037,16 +2103,16 @@ class Assembler { } #endif - inline PositionIndependentCodeOption pic() { + PositionIndependentCodeOption pic() const { return pic_; } - inline bool AllowPageOffsetDependentCode() { + bool AllowPageOffsetDependentCode() const { return (pic() == PageOffsetDependentCode) || (pic() == PositionDependentCode); } - static inline const Register& AppropriateZeroRegFor(const CPURegister& reg) { + static const Register& AppropriateZeroRegFor(const CPURegister& reg) { return reg.Is64Bits() ? xzr : wzr; } @@ -2056,14 +2122,15 @@ class Assembler { const MemOperand& addr, LoadStoreOp op, LoadStoreScalingOption option = PreferScaledOffset); - static bool IsImmLSUnscaled(int64_t offset); - static bool IsImmLSScaled(int64_t offset, LSDataSize size); void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, const MemOperand& addr, LoadStorePairOp op); - static bool IsImmLSPair(int64_t offset, LSDataSize size); + + void Prefetch(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 // reports a bogus uninitialised warning then. @@ -2077,18 +2144,12 @@ class Assembler { unsigned imm_s, unsigned imm_r, LogicalOp op); - static bool IsImmLogical(uint64_t value, - unsigned width, - unsigned* n = NULL, - unsigned* imm_s = NULL, - unsigned* imm_r = NULL); void ConditionalCompare(const Register& rn, const Operand& operand, StatusFlags nzcv, Condition cond, ConditionalCompareOp op); - static bool IsImmConditionalCompare(int64_t immediate); void AddSubWithCarry(const Register& rd, const Register& rn, @@ -2096,8 +2157,6 @@ class Assembler { FlagsUpdate S, AddSubWithCarryOp op); - static bool IsImmFP32(float imm); - static bool IsImmFP64(double imm); // Functions for emulating operands not directly supported by the instruction // set. @@ -2115,7 +2174,6 @@ class Assembler { const Operand& operand, FlagsUpdate S, AddSubOp op); - static bool IsImmAddSub(int64_t immediate); // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified // registers. Only simple loads are supported; sign- and zero-extension (such @@ -2180,6 +2238,12 @@ class Assembler { const FPRegister& fa, FPDataProcessing3SourceOp op); + // Encode the specified MemOperand for the specified access size and scaling + // preference. + Instr LoadStoreMemOperand(const MemOperand& addr, + LSDataSize size, + LoadStoreScalingOption option); + // Link the current (not-yet-emitted) instruction to the specified label, then // return an offset to be encoded in the instruction. If the label is not yet // bound, an offset of 0 is returned. @@ -2205,7 +2269,7 @@ class Assembler { CodeBuffer* buffer_; PositionIndependentCodeOption pic_; -#ifdef DEBUG +#ifdef VIXL_DEBUG int64_t buffer_monitor_; #endif }; @@ -2239,7 +2303,7 @@ class CodeBufferCheckScope { AssertPolicy assert_policy = kMaximumSize) : assm_(assm) { if (check_policy == kCheck) assm->EnsureSpaceFor(size); -#ifdef DEBUG +#ifdef VIXL_DEBUG assm->bind(&start_); size_ = size; assert_policy_ = assert_policy; @@ -2251,7 +2315,7 @@ class CodeBufferCheckScope { // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert). explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) { -#ifdef DEBUG +#ifdef VIXL_DEBUG size_ = 0; assert_policy_ = kNoAssert; assm->AcquireBuffer(); @@ -2259,7 +2323,7 @@ class CodeBufferCheckScope { } ~CodeBufferCheckScope() { -#ifdef DEBUG +#ifdef VIXL_DEBUG assm_->ReleaseBuffer(); switch (assert_policy_) { case kNoAssert: break; @@ -2277,7 +2341,7 @@ class CodeBufferCheckScope { protected: Assembler* assm_; -#ifdef DEBUG +#ifdef VIXL_DEBUG Label start_; size_t size_; AssertPolicy assert_policy_; diff --git a/disas/libvixl/a64/constants-a64.h b/disas/libvixl/a64/constants-a64.h index 7a14f85f59..bc1a2c4b9b 100644 --- a/disas/libvixl/a64/constants-a64.h +++ b/disas/libvixl/a64/constants-a64.h @@ -31,12 +31,6 @@ namespace vixl { const unsigned kNumberOfRegisters = 32; const unsigned kNumberOfFPRegisters = 32; -// Callee saved registers are x21-x30(lr). -const int kNumberOfCalleeSavedRegisters = 10; -const int kFirstCalleeSavedRegisterIndex = 21; -// Callee saved FP registers are d8-d15. -const int kNumberOfCalleeSavedFPRegisters = 8; -const int kFirstCalleeSavedFPRegisterIndex = 8; #define REGISTER_CODE_LIST(R) \ R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ @@ -53,7 +47,6 @@ V_(Ra, 14, 10, Bits) /* Third source register. */ \ V_(Rt, 4, 0, Bits) /* Load/store register. */ \ V_(Rt2, 14, 10, Bits) /* Load/store second register. */ \ V_(Rs, 20, 16, Bits) /* Exclusive access status. */ \ -V_(PrefetchMode, 4, 0, Bits) \ \ /* Common bits */ \ V_(SixtyFourBits, 31, 31, Bits) \ @@ -109,6 +102,10 @@ V_(ImmLSUnsigned, 21, 10, Bits) \ V_(ImmLSPair, 21, 15, SignedBits) \ V_(SizeLS, 31, 30, Bits) \ V_(ImmShiftLS, 12, 12, Bits) \ +V_(ImmPrefetchOperation, 4, 0, Bits) \ +V_(PrefetchHint, 4, 3, Bits) \ +V_(PrefetchTarget, 2, 1, Bits) \ +V_(PrefetchStream, 0, 0, Bits) \ \ /* Other immediates */ \ V_(ImmUncondBranch, 25, 0, SignedBits) \ @@ -269,6 +266,29 @@ enum BarrierType { BarrierAll = 3 }; +enum PrefetchOperation { + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + + PLIL1KEEP = 0x08, + PLIL1STRM = 0x09, + PLIL2KEEP = 0x0a, + PLIL2STRM = 0x0b, + PLIL3KEEP = 0x0c, + PLIL3STRM = 0x0d, + + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 +}; + // System/special register names. // This information is not encoded as one field but as the concatenation of // multiple fields (Op0<0>, Op1, Crn, Crm, Op2). @@ -605,6 +625,12 @@ enum LoadStoreAnyOp { LoadStoreAnyFixed = 0x08000000 }; +// Any load pair or store pair. +enum LoadStorePairAnyOp { + LoadStorePairAnyFMask = 0x3a000000, + LoadStorePairAnyFixed = 0x28000000 +}; + #define LOAD_STORE_PAIR_OP_LIST(V) \ V(STP, w, 0x00000000), \ V(LDP, w, 0x00400000), \ @@ -703,27 +729,28 @@ enum LoadLiteralOp { V(LD, R, d, 0xC4400000) +// Load/store (post, pre, offset and unsigned.) +enum LoadStoreOp { + LoadStoreOpMask = 0xC4C00000, + #define LOAD_STORE(A, B, C, D) \ + A##B##_##C = D + LOAD_STORE_OP_LIST(LOAD_STORE), + #undef LOAD_STORE + PRFM = 0xC0800000 +}; + // Load/store unscaled offset. enum LoadStoreUnscaledOffsetOp { LoadStoreUnscaledOffsetFixed = 0x38000000, LoadStoreUnscaledOffsetFMask = 0x3B200C00, LoadStoreUnscaledOffsetMask = 0xFFE00C00, + PRFUM = LoadStoreUnscaledOffsetFixed | PRFM, #define LOAD_STORE_UNSCALED(A, B, C, D) \ A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED) #undef LOAD_STORE_UNSCALED }; -// Load/store (post, pre, offset and unsigned.) -enum LoadStoreOp { - LoadStoreOpMask = 0xC4C00000, - #define LOAD_STORE(A, B, C, D) \ - A##B##_##C = D - LOAD_STORE_OP_LIST(LOAD_STORE), - #undef LOAD_STORE - PRFM = 0xC0800000 -}; - // Load/store post index. enum LoadStorePostIndex { LoadStorePostIndexFixed = 0x38000400, diff --git a/disas/libvixl/a64/decoder-a64.h b/disas/libvixl/a64/decoder-a64.h index 172594c89b..fd08d6c1f4 100644 --- a/disas/libvixl/a64/decoder-a64.h +++ b/disas/libvixl/a64/decoder-a64.h @@ -108,7 +108,7 @@ class DecoderVisitor { } private: - VisitorConstness constness_; + const VisitorConstness constness_; }; diff --git a/disas/libvixl/a64/disasm-a64.cc b/disas/libvixl/a64/disasm-a64.cc index e4a74aa57c..f7bc2468bb 100644 --- a/disas/libvixl/a64/disasm-a64.cc +++ b/disas/libvixl/a64/disasm-a64.cc @@ -34,6 +34,7 @@ Disassembler::Disassembler() { buffer_ = reinterpret_cast<char*>(malloc(buffer_size_)); buffer_pos_ = 0; own_buffer_ = true; + code_address_offset_ = 0; } @@ -42,6 +43,7 @@ Disassembler::Disassembler(char* text_buffer, int buffer_size) { buffer_ = text_buffer; buffer_pos_ = 0; own_buffer_ = false; + code_address_offset_ = 0; } @@ -739,9 +741,25 @@ void Disassembler::VisitMoveWideImmediate(const Instruction* instr) { // shift calculation. switch (instr->Mask(MoveWideImmediateMask)) { case MOVN_w: - case MOVN_x: mnemonic = "movn"; break; + case MOVN_x: + if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) { + if ((instr->SixtyFourBits() == 0) && (instr->ImmMoveWide() == 0xffff)) { + mnemonic = "movn"; + } else { + mnemonic = "mov"; + form = "'Rd, 'IMoveNeg"; + } + } else { + mnemonic = "movn"; + } + break; case MOVZ_w: - case MOVZ_x: mnemonic = "movz"; break; + case MOVZ_x: + if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) + mnemonic = "mov"; + else + mnemonic = "movz"; + break; case MOVK_w: case MOVK_x: mnemonic = "movk"; form = "'Rd, 'IMoveLSL"; break; default: VIXL_UNREACHABLE(); @@ -806,7 +824,7 @@ void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) { case A##_unsigned: mnemonic = B; form = C ", ['Xns'ILU]"; break; LOAD_STORE_LIST(LS_UNSIGNEDOFFSET) #undef LS_UNSIGNEDOFFSET - case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xn'ILU]"; + case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xns'ILU]"; } Format(instr, mnemonic, form); } @@ -833,6 +851,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) { const char *form_x = "'Xt, ['Xns'ILS]"; const char *form_s = "'St, ['Xns'ILS]"; const char *form_d = "'Dt, ['Xns'ILS]"; + const char *form_prefetch = "'PrefOp, ['Xns'ILS]"; switch (instr->Mask(LoadStoreUnscaledOffsetMask)) { case STURB_w: mnemonic = "sturb"; break; @@ -852,6 +871,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) { case LDURSH_x: form = form_x; // Fall through. case LDURSH_w: mnemonic = "ldursh"; break; case LDURSW_x: mnemonic = "ldursw"; form = form_x; break; + case PRFUM: mnemonic = "prfum"; form = form_prefetch; break; default: form = "(LoadStoreUnscaledOffset)"; } Format(instr, mnemonic, form); @@ -872,6 +892,11 @@ void Disassembler::VisitLoadLiteral(const Instruction* instr) { form = "'Xt, 'ILLiteral 'LValue"; break; } + case PRFM_lit: { + mnemonic = "prfm"; + form = "'PrefOp, 'ILLiteral 'LValue"; + break; + } default: mnemonic = "unimplemented"; } Format(instr, mnemonic, form); @@ -1344,7 +1369,7 @@ void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr, void Disassembler::AppendAddressToOutput(const Instruction* instr, const void* addr) { USE(instr); - AppendToOutput("(addr %p)", addr); + AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast<uintptr_t>(addr)); } @@ -1360,6 +1385,40 @@ void Disassembler::AppendDataAddressToOutput(const Instruction* instr, } +void Disassembler::AppendCodeRelativeAddressToOutput(const Instruction* instr, + const void* addr) { + USE(instr); + int64_t rel_addr = CodeRelativeAddress(addr); + if (rel_addr >= 0) { + AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr); + } else { + AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr); + } +} + + +void Disassembler::AppendCodeRelativeCodeAddressToOutput( + const Instruction* instr, const void* addr) { + AppendCodeRelativeAddressToOutput(instr, addr); +} + + +void Disassembler::AppendCodeRelativeDataAddressToOutput( + const Instruction* instr, const void* addr) { + AppendCodeRelativeAddressToOutput(instr, addr); +} + + +void Disassembler::MapCodeAddress(int64_t base_address, + const Instruction* instr_address) { + set_code_address_offset( + base_address - reinterpret_cast<intptr_t>(instr_address)); +} +int64_t Disassembler::CodeRelativeAddress(const void* addr) { + return reinterpret_cast<intptr_t>(addr) + code_address_offset(); +} + + void Disassembler::Format(const Instruction* instr, const char* mnemonic, const char* format) { VIXL_ASSERT(mnemonic != NULL); @@ -1486,16 +1545,20 @@ int Disassembler::SubstituteImmediateField(const Instruction* instr, VIXL_ASSERT(format[0] == 'I'); switch (format[1]) { - case 'M': { // IMoveImm or IMoveLSL. - if (format[5] == 'I') { - uint64_t imm = instr->ImmMoveWide() << (16 * instr->ShiftMoveWide()); - AppendToOutput("#0x%" PRIx64, imm); - } else { - VIXL_ASSERT(format[5] == 'L'); + case 'M': { // IMoveImm, IMoveNeg or IMoveLSL. + if (format[5] == 'L') { AppendToOutput("#0x%" PRIx64, instr->ImmMoveWide()); if (instr->ShiftMoveWide() > 0) { AppendToOutput(", lsl #%" PRId64, 16 * instr->ShiftMoveWide()); } + } else { + VIXL_ASSERT((format[5] == 'I') || (format[5] == 'N')); + uint64_t imm = instr->ImmMoveWide() << (16 * instr->ShiftMoveWide()); + if (format[5] == 'N') + imm = ~imm; + if (!instr->SixtyFourBits()) + imm &= UINT64_C(0xffffffff); + AppendToOutput("#0x%" PRIx64, imm); } return 8; } @@ -1634,14 +1697,31 @@ int Disassembler::SubstituteLiteralField(const Instruction* instr, VIXL_ASSERT(strncmp(format, "LValue", 6) == 0); USE(format); + const void * address = instr->LiteralAddress<const void *>(); switch (instr->Mask(LoadLiteralMask)) { case LDR_w_lit: case LDR_x_lit: case LDRSW_x_lit: case LDR_s_lit: case LDR_d_lit: - AppendDataAddressToOutput(instr, instr->LiteralAddress()); + AppendCodeRelativeDataAddressToOutput(instr, address); break; + case PRFM_lit: { + // Use the prefetch hint to decide how to print the address. + switch (instr->PrefetchHint()) { + case 0x0: // PLD: prefetch for load. + case 0x2: // PST: prepare for store. + AppendCodeRelativeDataAddressToOutput(instr, address); + break; + case 0x1: // PLI: preload instructions. + AppendCodeRelativeCodeAddressToOutput(instr, address); + break; + case 0x3: // Unallocated hint. + AppendCodeRelativeAddressToOutput(instr, address); + break; + } + break; + } default: VIXL_UNREACHABLE(); } @@ -1701,17 +1781,22 @@ int Disassembler::SubstitutePCRelAddressField(const Instruction* instr, (strcmp(format, "AddrPCRelPage") == 0)); // Used by `adrp`. int64_t offset = instr->ImmPCRel(); - const Instruction * base = instr; + // Compute the target address based on the effective address (after applying + // code_address_offset). This is required for correct behaviour of adrp. + const Instruction* base = instr + code_address_offset(); if (format[9] == 'P') { offset *= kPageSize; base = AlignDown(base, kPageSize); } + // Strip code_address_offset before printing, so we can use the + // semantically-correct AppendCodeRelativeAddressToOutput. + const void* target = + reinterpret_cast<const void*>(base + offset - code_address_offset()); - const void* target = reinterpret_cast<const void*>(base + offset); AppendPCRelativeOffsetToOutput(instr, offset); AppendToOutput(" "); - AppendAddressToOutput(instr, target); + AppendCodeRelativeAddressToOutput(instr, target); return 13; } @@ -1738,7 +1823,7 @@ int Disassembler::SubstituteBranchTargetField(const Instruction* instr, AppendPCRelativeOffsetToOutput(instr, offset); AppendToOutput(" "); - AppendCodeAddressToOutput(instr, target_address); + AppendCodeRelativeCodeAddressToOutput(instr, target_address); return 8; } @@ -1805,13 +1890,26 @@ int Disassembler::SubstitutePrefetchField(const Instruction* instr, VIXL_ASSERT(format[0] == 'P'); USE(format); - int prefetch_mode = instr->PrefetchMode(); - - const char* ls = (prefetch_mode & 0x10) ? "st" : "ld"; - int level = (prefetch_mode >> 1) + 1; - const char* ks = (prefetch_mode & 1) ? "strm" : "keep"; - - AppendToOutput("p%sl%d%s", ls, level, ks); + static const char* hints[] = {"ld", "li", "st"}; + static const char* stream_options[] = {"keep", "strm"}; + + unsigned hint = instr->PrefetchHint(); + unsigned target = instr->PrefetchTarget() + 1; + unsigned stream = instr->PrefetchStream(); + + if ((hint >= (sizeof(hints) / sizeof(hints[0]))) || (target > 3)) { + // Unallocated prefetch operations. + int prefetch_mode = instr->ImmPrefetchOperation(); + AppendToOutput("#0b%c%c%c%c%c", + (prefetch_mode & (1 << 4)) ? '1' : '0', + (prefetch_mode & (1 << 3)) ? '1' : '0', + (prefetch_mode & (1 << 2)) ? '1' : '0', + (prefetch_mode & (1 << 1)) ? '1' : '0', + (prefetch_mode & (1 << 0)) ? '1' : '0'); + } else { + VIXL_ASSERT(stream < (sizeof(stream_options) / sizeof(stream_options[0]))); + AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]); + } return 6; } diff --git a/disas/libvixl/a64/disasm-a64.h b/disas/libvixl/a64/disasm-a64.h index db043375c5..ddfe98be19 100644 --- a/disas/libvixl/a64/disasm-a64.h +++ b/disas/libvixl/a64/disasm-a64.h @@ -43,7 +43,7 @@ class Disassembler: public DecoderVisitor { char* GetOutput(); // Declare all Visitor functions. - #define DECLARE(A) void Visit##A(const Instruction* instr); + #define DECLARE(A) virtual void Visit##A(const Instruction* instr); VISITOR_LIST(DECLARE) #undef DECLARE @@ -65,23 +65,45 @@ class Disassembler: public DecoderVisitor { // Prints an address, in the general case. It can be code or data. This is // used for example to print the target address of an ADR instruction. - virtual void AppendAddressToOutput(const Instruction* instr, - const void* addr); + virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr, + const void* addr); // Prints the address of some code. // This is used for example to print the target address of a branch to an // immediate offset. // A sub-class can for example override this method to lookup the address and // print an appropriate name. - virtual void AppendCodeAddressToOutput(const Instruction* instr, - const void* addr); + virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr, + const void* addr); // Prints the address of some data. // This is used for example to print the source address of a load literal // instruction. + virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr, + const void* addr); + + // Same as the above, but for addresses that are not relative to the code + // buffer. They are currently not used by VIXL. + virtual void AppendAddressToOutput(const Instruction* instr, + const void* addr); + virtual void AppendCodeAddressToOutput(const Instruction* instr, + const void* addr); virtual void AppendDataAddressToOutput(const Instruction* instr, const void* addr); + public: + // Get/Set the offset that should be added to code addresses when printing + // code-relative addresses in the AppendCodeRelative<Type>AddressToOutput() + // helpers. + // Below is an example of how a branch immediate instruction in memory at + // address 0xb010200 would disassemble with different offsets. + // Base address | Disassembly + // 0x0 | 0xb010200: b #+0xcc (addr 0xb0102cc) + // 0x10000 | 0xb000200: b #+0xcc (addr 0xb0002cc) + // 0xb010200 | 0x0: b #+0xcc (addr 0xcc) + void MapCodeAddress(int64_t base_address, const Instruction* instr_address); + int64_t CodeRelativeAddress(const void* instr); + private: void Format( const Instruction* instr, const char* mnemonic, const char* format); @@ -101,32 +123,40 @@ class Disassembler: public DecoderVisitor { int SubstitutePrefetchField(const Instruction* instr, const char* format); int SubstituteBarrierField(const Instruction* instr, const char* format); - inline bool RdIsZROrSP(const Instruction* instr) const { + bool RdIsZROrSP(const Instruction* instr) const { return (instr->Rd() == kZeroRegCode); } - inline bool RnIsZROrSP(const Instruction* instr) const { + bool RnIsZROrSP(const Instruction* instr) const { return (instr->Rn() == kZeroRegCode); } - inline bool RmIsZROrSP(const Instruction* instr) const { + bool RmIsZROrSP(const Instruction* instr) const { return (instr->Rm() == kZeroRegCode); } - inline bool RaIsZROrSP(const Instruction* instr) const { + bool RaIsZROrSP(const Instruction* instr) const { return (instr->Ra() == kZeroRegCode); } bool IsMovzMovnImm(unsigned reg_size, uint64_t value); + int64_t code_address_offset() const { return code_address_offset_; } + protected: void ResetOutput(); void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3); + void set_code_address_offset(int64_t code_address_offset) { + code_address_offset_ = code_address_offset; + } + char* buffer_; uint32_t buffer_pos_; uint32_t buffer_size_; bool own_buffer_; + + int64_t code_address_offset_; }; diff --git a/disas/libvixl/a64/instructions-a64.cc b/disas/libvixl/a64/instructions-a64.cc index 1f08c781eb..b091886838 100644 --- a/disas/libvixl/a64/instructions-a64.cc +++ b/disas/libvixl/a64/instructions-a64.cc @@ -30,6 +30,20 @@ namespace vixl { +// Floating-point infinity values. +const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000); +const float kFP32NegativeInfinity = rawbits_to_float(0xff800000); +const double kFP64PositiveInfinity = + rawbits_to_double(UINT64_C(0x7ff0000000000000)); +const double kFP64NegativeInfinity = + rawbits_to_double(UINT64_C(0xfff0000000000000)); + + +// The default NaN values (for FPCR.DN=1). +const double kFP64DefaultNaN = rawbits_to_double(UINT64_C(0x7ff8000000000000)); +const float kFP32DefaultNaN = rawbits_to_float(0x7fc00000); + + static uint64_t RotateRight(uint64_t value, unsigned int rotate, unsigned int width) { @@ -54,6 +68,55 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size, } +bool Instruction::IsLoad() const { + if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { + return false; + } + + if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) { + return Mask(LoadStorePairLBit) != 0; + } else { + LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreOpMask)); + switch (op) { + case LDRB_w: + case LDRH_w: + case LDR_w: + case LDR_x: + case LDRSB_w: + case LDRSB_x: + case LDRSH_w: + case LDRSH_x: + case LDRSW_x: + case LDR_s: + case LDR_d: return true; + default: return false; + } + } +} + + +bool Instruction::IsStore() const { + if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { + return false; + } + + if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) { + return Mask(LoadStorePairLBit) == 0; + } else { + LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreOpMask)); + switch (op) { + case STRB_w: + case STRH_w: + case STR_w: + case STR_x: + case STR_s: + case STR_d: return true; + default: return false; + } + } +} + + // Logical immediates can't encode zero, so a return value of zero is used to // indicate a failure case. Specifically, where the constraints on imm_s are // not met. diff --git a/disas/libvixl/a64/instructions-a64.h b/disas/libvixl/a64/instructions-a64.h index 29f972291b..f1d883ccc7 100644 --- a/disas/libvixl/a64/instructions-a64.h +++ b/disas/libvixl/a64/instructions-a64.h @@ -96,6 +96,17 @@ const unsigned kDoubleExponentBits = 11; const unsigned kFloatMantissaBits = 23; const unsigned kFloatExponentBits = 8; +// Floating-point infinity values. +extern const float kFP32PositiveInfinity; +extern const float kFP32NegativeInfinity; +extern const double kFP64PositiveInfinity; +extern const double kFP64NegativeInfinity; + +// The default NaN values (for FPCR.DN=1). +extern const double kFP64DefaultNaN; +extern const float kFP32DefaultNaN; + + enum LSDataSize { LSByte = 0, LSHalfword = 1, @@ -140,33 +151,33 @@ enum Reg31Mode { class Instruction { public: - inline Instr InstructionBits() const { + Instr InstructionBits() const { return *(reinterpret_cast<const Instr*>(this)); } - inline void SetInstructionBits(Instr new_instr) { + void SetInstructionBits(Instr new_instr) { *(reinterpret_cast<Instr*>(this)) = new_instr; } - inline int Bit(int pos) const { + int Bit(int pos) const { return (InstructionBits() >> pos) & 1; } - inline uint32_t Bits(int msb, int lsb) const { + uint32_t Bits(int msb, int lsb) const { return unsigned_bitextract_32(msb, lsb, InstructionBits()); } - inline int32_t SignedBits(int msb, int lsb) const { + int32_t SignedBits(int msb, int lsb) const { int32_t bits = *(reinterpret_cast<const int32_t*>(this)); return signed_bitextract_32(msb, lsb, bits); } - inline Instr Mask(uint32_t mask) const { + Instr Mask(uint32_t mask) const { return InstructionBits() & mask; } #define DEFINE_GETTER(Name, HighBit, LowBit, Func) \ - inline int64_t Name() const { return Func(HighBit, LowBit); } + int64_t Name() const { return Func(HighBit, LowBit); } INSTRUCTION_FIELDS_LIST(DEFINE_GETTER) #undef DEFINE_GETTER @@ -182,56 +193,64 @@ class Instruction { float ImmFP32() const; double ImmFP64() const; - inline LSDataSize SizeLSPair() const { + LSDataSize SizeLSPair() const { return CalcLSPairDataSize( static_cast<LoadStorePairOp>(Mask(LoadStorePairMask))); } // Helpers. - inline bool IsCondBranchImm() const { + bool IsCondBranchImm() const { return Mask(ConditionalBranchFMask) == ConditionalBranchFixed; } - inline bool IsUncondBranchImm() const { + bool IsUncondBranchImm() const { return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed; } - inline bool IsCompareBranch() const { + bool IsCompareBranch() const { return Mask(CompareBranchFMask) == CompareBranchFixed; } - inline bool IsTestBranch() const { + bool IsTestBranch() const { return Mask(TestBranchFMask) == TestBranchFixed; } - inline bool IsPCRelAddressing() const { + bool IsPCRelAddressing() const { return Mask(PCRelAddressingFMask) == PCRelAddressingFixed; } - inline bool IsLogicalImmediate() const { + bool IsLogicalImmediate() const { return Mask(LogicalImmediateFMask) == LogicalImmediateFixed; } - inline bool IsAddSubImmediate() const { + bool IsAddSubImmediate() const { return Mask(AddSubImmediateFMask) == AddSubImmediateFixed; } - inline bool IsAddSubExtended() const { + bool IsAddSubExtended() const { return Mask(AddSubExtendedFMask) == AddSubExtendedFixed; } - inline bool IsLoadOrStore() const { + bool IsLoadOrStore() const { return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed; } - inline bool IsMovn() const { + bool IsLoad() const; + bool IsStore() const; + + bool IsLoadLiteral() const { + // This includes PRFM_lit. + return Mask(LoadLiteralFMask) == LoadLiteralFixed; + } + + bool IsMovn() const { return (Mask(MoveWideImmediateMask) == MOVN_x) || (Mask(MoveWideImmediateMask) == MOVN_w); } // Indicate whether Rd can be the stack pointer or the zero register. This // does not check that the instruction actually has an Rd field. - inline Reg31Mode RdMode() const { + Reg31Mode RdMode() const { // The following instructions use sp or wsp as Rd: // Add/sub (immediate) when not setting the flags. // Add/sub (extended) when not setting the flags. @@ -260,7 +279,7 @@ class Instruction { // Indicate whether Rn can be the stack pointer or the zero register. This // does not check that the instruction actually has an Rn field. - inline Reg31Mode RnMode() const { + Reg31Mode RnMode() const { // The following instructions use sp or wsp as Rn: // All loads and stores. // Add/sub (immediate). @@ -272,7 +291,7 @@ class Instruction { return Reg31IsZeroRegister; } - inline ImmBranchType BranchType() const { + ImmBranchType BranchType() const { if (IsCondBranchImm()) { return CondBranchType; } else if (IsUncondBranchImm()) { @@ -296,55 +315,66 @@ class Instruction { // Patch a literal load instruction to load from 'source'. void SetImmLLiteral(const Instruction* source); - inline uint8_t* LiteralAddress() const { - int offset = ImmLLiteral() << kLiteralEntrySizeLog2; - const uint8_t* address = reinterpret_cast<const uint8_t*>(this) + offset; - // Note that the result is safely mutable only if the backing buffer is - // safely mutable. - return const_cast<uint8_t*>(address); + // Calculate the address of a literal referred to by a load-literal + // instruction, and return it as the specified type. + // + // The literal itself is safely mutable only if the backing buffer is safely + // mutable. + template <typename T> + T LiteralAddress() const { + uint64_t base_raw = reinterpret_cast<uintptr_t>(this); + ptrdiff_t offset = ImmLLiteral() << kLiteralEntrySizeLog2; + uint64_t address_raw = base_raw + offset; + + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + T address = (T)(address_raw); + + // Assert that the address can be represented by the specified type. + VIXL_ASSERT((uint64_t)(address) == address_raw); + + return address; } - inline uint32_t Literal32() const { + uint32_t Literal32() const { uint32_t literal; - memcpy(&literal, LiteralAddress(), sizeof(literal)); - + memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal)); return literal; } - inline uint64_t Literal64() const { + uint64_t Literal64() const { uint64_t literal; - memcpy(&literal, LiteralAddress(), sizeof(literal)); - + memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal)); return literal; } - inline float LiteralFP32() const { + float LiteralFP32() const { return rawbits_to_float(Literal32()); } - inline double LiteralFP64() const { + double LiteralFP64() const { return rawbits_to_double(Literal64()); } - inline const Instruction* NextInstruction() const { + const Instruction* NextInstruction() const { return this + kInstructionSize; } - inline const Instruction* InstructionAtOffset(int64_t offset) const { + const Instruction* InstructionAtOffset(int64_t offset) const { VIXL_ASSERT(IsWordAligned(this + offset)); return this + offset; } - template<typename T> static inline Instruction* Cast(T src) { + template<typename T> static Instruction* Cast(T src) { return reinterpret_cast<Instruction*>(src); } - template<typename T> static inline const Instruction* CastConst(T src) { + template<typename T> static const Instruction* CastConst(T src) { return reinterpret_cast<const Instruction*>(src); } private: - inline int ImmBranch() const; + int ImmBranch() const; void SetPCRelImmTarget(const Instruction* target); void SetBranchImmTarget(const Instruction* target); diff --git a/disas/libvixl/globals.h b/disas/libvixl/globals.h index e28dc6663a..0c2493105d 100644 --- a/disas/libvixl/globals.h +++ b/disas/libvixl/globals.h @@ -58,7 +58,7 @@ const int KBytes = 1024; const int MBytes = 1024 * KBytes; #define VIXL_ABORT() printf("in %s, line %i", __FILE__, __LINE__); abort() -#ifdef DEBUG +#ifdef VIXL_DEBUG #define VIXL_ASSERT(condition) assert(condition) #define VIXL_CHECK(condition) VIXL_ASSERT(condition) #define VIXL_UNIMPLEMENTED() printf("UNIMPLEMENTED\t"); VIXL_ABORT() diff --git a/disas/libvixl/utils.cc b/disas/libvixl/utils.cc index 21965d7a1f..80b132a11e 100644 --- a/disas/libvixl/utils.cc +++ b/disas/libvixl/utils.cc @@ -135,4 +135,17 @@ bool IsPowerOf2(int64_t value) { return (value != 0) && ((value & (value - 1)) == 0); } + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) { + VIXL_ASSERT((reg_size % 8) == 0); + int count = 0; + for (unsigned i = 0; i < (reg_size / 16); i++) { + if ((imm & 0xffff) == 0) { + count++; + } + imm >>= 16; + } + return count; +} + } // namespace vixl diff --git a/disas/libvixl/utils.h b/disas/libvixl/utils.h index 1540c3060b..b4406263ac 100644 --- a/disas/libvixl/utils.h +++ b/disas/libvixl/utils.h @@ -166,6 +166,8 @@ int CountSetBits(uint64_t value, int width); uint64_t LowestSetBit(uint64_t value); bool IsPowerOf2(int64_t value); +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); + // Pointer alignment // TODO: rename/refactor to make it specific to instructions. template<typename T> @@ -174,14 +176,14 @@ bool IsWordAligned(T pointer) { return ((intptr_t)(pointer) & 3) == 0; } -// Increment a pointer until it has the specified alignment. +// Increment a pointer (up to 64 bits) until it has the specified alignment. template<class T> T AlignUp(T pointer, size_t alignment) { // Use C-style casts to get static_cast behaviour for integral types (T), and // reinterpret_cast behaviour for other types. - uintptr_t pointer_raw = (uintptr_t)pointer; - VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw)); + uint64_t pointer_raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); size_t align_step = (alignment - pointer_raw) % alignment; VIXL_ASSERT((pointer_raw + align_step) % alignment == 0); @@ -189,14 +191,14 @@ T AlignUp(T pointer, size_t alignment) { return (T)(pointer_raw + align_step); } -// Decrement a pointer until it has the specified alignment. +// Decrement a pointer (up to 64 bits) until it has the specified alignment. template<class T> T AlignDown(T pointer, size_t alignment) { // Use C-style casts to get static_cast behaviour for integral types (T), and // reinterpret_cast behaviour for other types. - uintptr_t pointer_raw = (uintptr_t)pointer; - VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw)); + uint64_t pointer_raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); size_t align_step = pointer_raw % alignment; VIXL_ASSERT((pointer_raw - align_step) % alignment == 0); diff --git a/disas/s390.c b/disas/s390.c index 25499ba419..974460c814 100644 --- a/disas/s390.c +++ b/disas/s390.c @@ -106,10 +106,6 @@ struct s390_opcode static const struct s390_opcode s390_opcodes[]; static const int s390_num_opcodes; -/* A opcode format table for the .insn pseudo mnemonic. */ -static const struct s390_opcode s390_opformats[]; -static const int s390_num_opformats; - /* Values defined for the flags field of a struct powerpc_opcode. */ /* The operands table is an array of struct s390_operand. */ @@ -844,37 +840,6 @@ static const struct s390_operand s390_operands[] = #define MASK_SIY_DRI { 0xff, 0x00, 0x00, 0x00, 0x00, 0xff } /* QEMU-END */ -/* The opcode formats table (blueprints for .insn pseudo mnemonic). */ - -static const struct s390_opcode s390_opformats[] = - { - { "e", OP8(0x00LL), MASK_E, INSTR_E, 3, 0 }, - { "ri", OP8(0x00LL), MASK_RI_RI, INSTR_RI_RI, 3, 0 }, - { "rie", OP8(0x00LL), MASK_RIE_RRP, INSTR_RIE_RRP, 3, 0 }, - { "ril", OP8(0x00LL), MASK_RIL_RP, INSTR_RIL_RP, 3, 0 }, - { "rilu", OP8(0x00LL), MASK_RIL_RU, INSTR_RIL_RU, 3, 0 }, - { "rr", OP8(0x00LL), MASK_RR_RR, INSTR_RR_RR, 3, 0 }, - { "rre", OP8(0x00LL), MASK_RRE_RR, INSTR_RRE_RR, 3, 0 }, - { "rrf", OP8(0x00LL), MASK_RRF_RURR, INSTR_RRF_RURR, 3, 0 }, - { "rs", OP8(0x00LL), MASK_RS_RRRD, INSTR_RS_RRRD, 3, 0 }, - { "rse", OP8(0x00LL), MASK_RSE_RRRD, INSTR_RSE_RRRD, 3, 0 }, - { "rsi", OP8(0x00LL), MASK_RSI_RRP, INSTR_RSI_RRP, 3, 0 }, - { "rsy", OP8(0x00LL), MASK_RSY_RRRD, INSTR_RSY_RRRD, 3, 3 }, - { "rx", OP8(0x00LL), MASK_RX_RRRD, INSTR_RX_RRRD, 3, 0 }, - { "rxe", OP8(0x00LL), MASK_RXE_RRRD, INSTR_RXE_RRRD, 3, 0 }, - { "rxf", OP8(0x00LL), MASK_RXF_RRRDR, INSTR_RXF_RRRDR,3, 0 }, - { "rxy", OP8(0x00LL), MASK_RXY_RRRD, INSTR_RXY_RRRD, 3, 3 }, - { "s", OP8(0x00LL), MASK_S_RD, INSTR_S_RD, 3, 0 }, - { "si", OP8(0x00LL), MASK_SI_URD, INSTR_SI_URD, 3, 0 }, - { "siy", OP8(0x00LL), MASK_SIY_URD, INSTR_SIY_URD, 3, 3 }, - { "ss", OP8(0x00LL), MASK_SS_RRRDRD, INSTR_SS_RRRDRD,3, 0 }, - { "sse", OP8(0x00LL), MASK_SSE_RDRD, INSTR_SSE_RDRD, 3, 0 }, - { "ssf", OP8(0x00LL), MASK_SSF_RRDRD, INSTR_SSF_RRDRD,3, 0 }, -}; - -static const int s390_num_opformats = - sizeof (s390_opformats) / sizeof (s390_opformats[0]); - /* include "s390-opc.tab" generated from opcodes/s390-opc.txt rev 1.17 */ /* The opcode table. This file was generated by s390-mkopc. diff --git a/docs/rcu.txt b/docs/rcu.txt new file mode 100644 index 0000000000..61752b93ab --- /dev/null +++ b/docs/rcu.txt @@ -0,0 +1,387 @@ +Using RCU (Read-Copy-Update) for synchronization +================================================ + +Read-copy update (RCU) is a synchronization mechanism that is used to +protect read-mostly data structures. RCU is very efficient and scalable +on the read side (it is wait-free), and thus can make the read paths +extremely fast. + +RCU supports concurrency between a single writer and multiple readers, +thus it is not used alone. Typically, the write-side will use a lock to +serialize multiple updates, but other approaches are possible (e.g., +restricting updates to a single task). In QEMU, when a lock is used, +this will often be the "iothread mutex", also known as the "big QEMU +lock" (BQL). Also, restricting updates to a single task is done in +QEMU using the "bottom half" API. + +RCU is fundamentally a "wait-to-finish" mechanism. The read side marks +sections of code with "critical sections", and the update side will wait +for the execution of all *currently running* critical sections before +proceeding, or before asynchronously executing a callback. + +The key point here is that only the currently running critical sections +are waited for; critical sections that are started _after_ the beginning +of the wait do not extend the wait, despite running concurrently with +the updater. This is the reason why RCU is more scalable than, +for example, reader-writer locks. It is so much more scalable that +the system will have a single instance of the RCU mechanism; a single +mechanism can be used for an arbitrary number of "things", without +having to worry about things such as contention or deadlocks. + +How is this possible? The basic idea is to split updates in two phases, +"removal" and "reclamation". During removal, we ensure that subsequent +readers will not be able to get a reference to the old data. After +removal has completed, a critical section will not be able to access +the old data. Therefore, critical sections that begin after removal +do not matter; as soon as all previous critical sections have finished, +there cannot be any readers who hold references to the data structure, +and these can now be safely reclaimed (e.g., freed or unref'ed). + +Here is a picutre: + + thread 1 thread 2 thread 3 + ------------------- ------------------------ ------------------- + enter RCU crit.sec. + | finish removal phase + | begin wait + | | enter RCU crit.sec. + exit RCU crit.sec | | + complete wait | + begin reclamation phase | + exit RCU crit.sec. + + +Note how thread 3 is still executing its critical section when thread 2 +starts reclaiming data. This is possible, because the old version of the +data structure was not accessible at the time thread 3 began executing +that critical section. + + +RCU API +======= + +The core RCU API is small: + + void rcu_read_lock(void); + + Used by a reader to inform the reclaimer that the reader is + entering an RCU read-side critical section. + + void rcu_read_unlock(void); + + Used by a reader to inform the reclaimer that the reader is + exiting an RCU read-side critical section. Note that RCU + read-side critical sections may be nested and/or overlapping. + + void synchronize_rcu(void); + + Blocks until all pre-existing RCU read-side critical sections + on all threads have completed. This marks the end of the removal + phase and the beginning of reclamation phase. + + Note that it would be valid for another update to come while + synchronize_rcu is running. Because of this, it is better that + the updater releases any locks it may hold before calling + synchronize_rcu. If this is not possible (for example, because + the updater is protected by the BQL), you can use call_rcu. + + void call_rcu1(struct rcu_head * head, + void (*func)(struct rcu_head *head)); + + This function invokes func(head) after all pre-existing RCU + read-side critical sections on all threads have completed. This + marks the end of the removal phase, with func taking care + asynchronously of the reclamation phase. + + The foo struct needs to have an rcu_head structure added, + perhaps as follows: + + struct foo { + struct rcu_head rcu; + int a; + char b; + long c; + }; + + so that the reclaimer function can fetch the struct foo address + and free it: + + call_rcu1(&foo.rcu, foo_reclaim); + + void foo_reclaim(struct rcu_head *rp) + { + struct foo *fp = container_of(rp, struct foo, rcu); + g_free(fp); + } + + For the common case where the rcu_head member is the first of the + struct, you can use the following macro. + + void call_rcu(T *p, + void (*func)(T *p), + field-name); + + call_rcu1 is typically used through this macro, in the common case + where the "struct rcu_head" is the first field in the struct. In + the above case, one could have written simply: + + call_rcu(foo_reclaim, g_free, rcu); + + typeof(*p) atomic_rcu_read(p); + + atomic_rcu_read() is similar to atomic_mb_read(), but it makes + some assumptions on the code that calls it. This allows a more + optimized implementation. + + atomic_rcu_read assumes that whenever a single RCU critical + section reads multiple shared data, these reads are either + data-dependent or need no ordering. This is almost always the + case when using RCU, because read-side critical sections typically + navigate one or more pointers (the pointers that are changed on + every update) until reaching a data structure of interest, + and then read from there. + + RCU read-side critical sections must use atomic_rcu_read() to + read data, unless concurrent writes are presented by another + synchronization mechanism. + + Furthermore, RCU read-side critical sections should traverse the + data structure in a single direction, opposite to the direction + in which the updater initializes it. + + void atomic_rcu_set(p, typeof(*p) v); + + atomic_rcu_set() is also similar to atomic_mb_set(), and it also + makes assumptions on the code that calls it in order to allow a more + optimized implementation. + + In particular, atomic_rcu_set() suffices for synchronization + with readers, if the updater never mutates a field within a + data item that is already accessible to readers. This is the + case when initializing a new copy of the RCU-protected data + structure; just ensure that initialization of *p is carried out + before atomic_rcu_set() makes the data item visible to readers. + If this rule is observed, writes will happen in the opposite + order as reads in the RCU read-side critical sections (or if + there is just one update), and there will be no need for other + synchronization mechanism to coordinate the accesses. + +The following APIs must be used before RCU is used in a thread: + + void rcu_register_thread(void); + + Mark a thread as taking part in the RCU mechanism. Such a thread + will have to report quiescent points regularly, either manually + or through the QemuCond/QemuSemaphore/QemuEvent APIs. + + void rcu_unregister_thread(void); + + Mark a thread as not taking part anymore in the RCU mechanism. + It is not a problem if such a thread reports quiescent points, + either manually or by using the QemuCond/QemuSemaphore/QemuEvent + APIs. + +Note that these APIs are relatively heavyweight, and should _not_ be +nested. + + +DIFFERENCES WITH LINUX +====================== + +- Waiting on a mutex is possible, though discouraged, within an RCU critical + section. This is because spinlocks are rarely (if ever) used in userspace + programming; not allowing this would prevent upgrading an RCU read-side + critical section to become an updater. + +- atomic_rcu_read and atomic_rcu_set replace rcu_dereference and + rcu_assign_pointer. They take a _pointer_ to the variable being accessed. + +- call_rcu is a macro that has an extra argument (the name of the first + field in the struct, which must be a struct rcu_head), and expects the + type of the callback's argument to be the type of the first argument. + call_rcu1 is the same as Linux's call_rcu. + + +RCU PATTERNS +============ + +Many patterns using read-writer locks translate directly to RCU, with +the advantages of higher scalability and deadlock immunity. + +In general, RCU can be used whenever it is possible to create a new +"version" of a data structure every time the updater runs. This may +sound like a very strict restriction, however: + +- the updater does not mean "everything that writes to a data structure", + but rather "everything that involves a reclamation step". See the + array example below + +- in some cases, creating a new version of a data structure may actually + be very cheap. For example, modifying the "next" pointer of a singly + linked list is effectively creating a new version of the list. + +Here are some frequently-used RCU idioms that are worth noting. + + +RCU list processing +------------------- + +TBD (not yet used in QEMU) + + +RCU reference counting +---------------------- + +Because grace periods are not allowed to complete while there is an RCU +read-side critical section in progress, the RCU read-side primitives +may be used as a restricted reference-counting mechanism. For example, +consider the following code fragment: + + rcu_read_lock(); + p = atomic_rcu_read(&foo); + /* do something with p. */ + rcu_read_unlock(); + +The RCU read-side critical section ensures that the value of "p" remains +valid until after the rcu_read_unlock(). In some sense, it is acquiring +a reference to p that is later released when the critical section ends. +The write side looks simply like this (with appropriate locking): + + qemu_mutex_lock(&foo_mutex); + old = foo; + atomic_rcu_set(&foo, new); + qemu_mutex_unlock(&foo_mutex); + synchronize_rcu(); + free(old); + +If the processing cannot be done purely within the critical section, it +is possible to combine this idiom with a "real" reference count: + + rcu_read_lock(); + p = atomic_rcu_read(&foo); + foo_ref(p); + rcu_read_unlock(); + /* do something with p. */ + foo_unref(p); + +The write side can be like this: + + qemu_mutex_lock(&foo_mutex); + old = foo; + atomic_rcu_set(&foo, new); + qemu_mutex_unlock(&foo_mutex); + synchronize_rcu(); + foo_unref(old); + +or with call_rcu: + + qemu_mutex_lock(&foo_mutex); + old = foo; + atomic_rcu_set(&foo, new); + qemu_mutex_unlock(&foo_mutex); + call_rcu(foo_unref, old, rcu); + +In both cases, the write side only performs removal. Reclamation +happens when the last reference to a "foo" object is dropped. +Using synchronize_rcu() is undesirably expensive, because the +last reference may be dropped on the read side. Hence you can +use call_rcu() instead: + + foo_unref(struct foo *p) { + if (atomic_fetch_dec(&p->refcount) == 1) { + call_rcu(foo_destroy, p, rcu); + } + } + + +Note that the same idioms would be possible with reader/writer +locks: + + read_lock(&foo_rwlock); write_mutex_lock(&foo_rwlock); + p = foo; p = foo; + /* do something with p. */ foo = new; + read_unlock(&foo_rwlock); free(p); + write_mutex_unlock(&foo_rwlock); + free(p); + + ------------------------------------------------------------------ + + read_lock(&foo_rwlock); write_mutex_lock(&foo_rwlock); + p = foo; old = foo; + foo_ref(p); foo = new; + read_unlock(&foo_rwlock); foo_unref(old); + /* do something with p. */ write_mutex_unlock(&foo_rwlock); + read_lock(&foo_rwlock); + foo_unref(p); + read_unlock(&foo_rwlock); + +foo_unref could use a mechanism such as bottom halves to move deallocation +out of the write-side critical section. + + +RCU resizable arrays +-------------------- + +Resizable arrays can be used with RCU. The expensive RCU synchronization +(or call_rcu) only needs to take place when the array is resized. +The two items to take care of are: + +- ensuring that the old version of the array is available between removal + and reclamation; + +- avoiding mismatches in the read side between the array data and the + array size. + +The first problem is avoided simply by not using realloc. Instead, +each resize will allocate a new array and copy the old data into it. +The second problem would arise if the size and the data pointers were +two members of a larger struct: + + struct mystuff { + ... + int data_size; + int data_alloc; + T *data; + ... + }; + +Instead, we store the size of the array with the array itself: + + struct arr { + int size; + int alloc; + T data[]; + }; + struct arr *global_array; + + read side: + rcu_read_lock(); + struct arr *array = atomic_rcu_read(&global_array); + x = i < array->size ? array->data[i] : -1; + rcu_read_unlock(); + return x; + + write side (running under a lock): + if (global_array->size == global_array->alloc) { + /* Creating a new version. */ + new_array = g_malloc(sizeof(struct arr) + + global_array->alloc * 2 * sizeof(T)); + new_array->size = global_array->size; + new_array->alloc = global_array->alloc * 2; + memcpy(new_array->data, global_array->data, + global_array->alloc * sizeof(T)); + + /* Removal phase. */ + old_array = global_array; + atomic_rcu_set(&new_array->data, new_array); + synchronize_rcu(); + + /* Reclamation phase. */ + free(old_array); + } + + +SOURCES +======= + +* Documentation/RCU/ from the Linux kernel @@ -535,6 +535,7 @@ out: qapi_free_VncInfo(info); } +#ifdef CONFIG_SPICE void hmp_info_spice(Monitor *mon, const QDict *qdict) { SpiceChannelList *chan; @@ -581,6 +582,7 @@ void hmp_info_spice(Monitor *mon, const QDict *qdict) out: qapi_free_SpiceInfo(info); } +#endif void hmp_info_balloon(Monitor *mon, const QDict *qdict) { diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c index 71262bccd2..e75aa8772e 100644 --- a/hw/9pfs/virtio-9p-synth.c +++ b/hw/9pfs/virtio-9p-synth.c @@ -17,6 +17,7 @@ #include "virtio-9p-xattr.h" #include "fsdev/qemu-fsdev.h" #include "virtio-9p-synth.h" +#include "qemu/rcu.h" #include <sys/stat.h> diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index ef24ca40fc..c6eab6de30 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -163,30 +163,23 @@ static void armv7m_reset(void *opaque) } /* Init CPU and memory for a v7-M based board. - flash_size and sram_size are in kb. + mem_size is in bytes. Returns the NVIC array. */ -qemu_irq *armv7m_init(MemoryRegion *system_memory, - int flash_size, int sram_size, +qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model) { ARMCPU *cpu; CPUARMState *env; DeviceState *nvic; - /* FIXME: make this local state. */ - static qemu_irq pic[64]; + qemu_irq *pic = g_new(qemu_irq, num_irq); int image_size; uint64_t entry; uint64_t lowaddr; int i; int big_endian; - MemoryRegion *sram = g_new(MemoryRegion, 1); - MemoryRegion *flash = g_new(MemoryRegion, 1); MemoryRegion *hack = g_new(MemoryRegion, 1); - flash_size *= 1024; - sram_size *= 1024; - if (cpu_model == NULL) { cpu_model = "cortex-m3"; } @@ -197,35 +190,15 @@ qemu_irq *armv7m_init(MemoryRegion *system_memory, } env = &cpu->env; -#if 0 - /* > 32Mb SRAM gets complicated because it overlaps the bitband area. - We don't have proper commandline options, so allocate half of memory - as SRAM, up to a maximum of 32Mb, and the rest as code. */ - if (ram_size > (512 + 32) * 1024 * 1024) - ram_size = (512 + 32) * 1024 * 1024; - sram_size = (ram_size / 2) & TARGET_PAGE_MASK; - if (sram_size > 32 * 1024 * 1024) - sram_size = 32 * 1024 * 1024; - code_size = ram_size - sram_size; -#endif - - /* Flash programming is done via the SCU, so pretend it is ROM. */ - memory_region_init_ram(flash, NULL, "armv7m.flash", flash_size, - &error_abort); - vmstate_register_ram_global(flash); - memory_region_set_readonly(flash, true); - memory_region_add_subregion(system_memory, 0, flash); - memory_region_init_ram(sram, NULL, "armv7m.sram", sram_size, &error_abort); - vmstate_register_ram_global(sram); - memory_region_add_subregion(system_memory, 0x20000000, sram); armv7m_bitband_init(); nvic = qdev_create(NULL, "armv7m_nvic"); + qdev_prop_set_uint32(nvic, "num-irq", num_irq); env->nvic = nvic; qdev_init_nofail(nvic); sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0, qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ)); - for (i = 0; i < 64; i++) { + for (i = 0; i < num_irq; i++) { pic[i] = qdev_get_gpio_in(nvic, i); } @@ -244,7 +217,7 @@ qemu_irq *armv7m_init(MemoryRegion *system_memory, image_size = load_elf(kernel_filename, NULL, NULL, &entry, &lowaddr, NULL, big_endian, ELF_MACHINE, 1); if (image_size < 0) { - image_size = load_image_targphys(kernel_filename, 0, flash_size); + image_size = load_image_targphys(kernel_filename, 0, mem_size); lowaddr = 0; } if (image_size < 0) { diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 52ebd8be9b..a48d1b28d4 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -463,8 +463,26 @@ static void do_cpu_reset(void *opaque) * (SCR.NS = 0), we change that here if non-secure boot has been * requested. */ - if (arm_feature(env, ARM_FEATURE_EL3) && !info->secure_boot) { - env->cp15.scr_el3 |= SCR_NS; + if (arm_feature(env, ARM_FEATURE_EL3)) { + /* AArch64 is defined to come out of reset into EL3 if enabled. + * If we are booting Linux then we need to adjust our EL as + * Linux expects us to be in EL2 or EL1. AArch32 resets into + * SVC, which Linux expects, so no privilege/exception level to + * adjust. + */ + if (env->aarch64) { + if (arm_feature(env, ARM_FEATURE_EL2)) { + env->pstate = PSTATE_MODE_EL2h; + } else { + env->pstate = PSTATE_MODE_EL1h; + } + } + + /* Set to non-secure if not a secure boot */ + if (!info->secure_boot) { + /* Linux expects non-secure state */ + env->cp15.scr_el3 |= SCR_NS; + } } if (CPU(cpu) == first_cpu) { diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index ccc3b189c3..cb515ec765 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -29,6 +29,8 @@ #define BP_OLED_SSI 0x02 #define BP_GAMEPAD 0x04 +#define NUM_IRQ_LINES 64 + typedef const struct { const char *name; uint32_t did0; @@ -1220,10 +1222,27 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model, int i; int j; - flash_size = ((board->dc0 & 0xffff) + 1) << 1; - sram_size = (board->dc0 >> 18) + 1; - pic = armv7m_init(get_system_memory(), - flash_size, sram_size, kernel_filename, cpu_model); + MemoryRegion *sram = g_new(MemoryRegion, 1); + MemoryRegion *flash = g_new(MemoryRegion, 1); + MemoryRegion *system_memory = get_system_memory(); + + flash_size = (((board->dc0 & 0xffff) + 1) << 1) * 1024; + sram_size = ((board->dc0 >> 18) + 1) * 1024; + + /* Flash programming is done via the SCU, so pretend it is ROM. */ + memory_region_init_ram(flash, NULL, "stellaris.flash", flash_size, + &error_abort); + vmstate_register_ram_global(flash); + memory_region_set_readonly(flash, true); + memory_region_add_subregion(system_memory, 0, flash); + + memory_region_init_ram(sram, NULL, "stellaris.sram", sram_size, + &error_abort); + vmstate_register_ram_global(sram); + memory_region_add_subregion(system_memory, 0x20000000, sram); + + pic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES, + kernel_filename, cpu_model); if (board->dc1 & (1 << 16)) { dev = sysbus_create_varargs(TYPE_STELLARIS_ADC, 0x40038000, diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 235344034d..34d9379032 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -441,10 +441,32 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic) int i; hwaddr size = vbi->memmap[VIRT_MMIO].size; - /* Note that we have to create the transports in forwards order - * so that command line devices are inserted lowest address first, - * and then add dtb nodes in reverse order so that they appear in - * the finished device tree lowest address first. + /* We create the transports in forwards order. Since qbus_realize() + * prepends (not appends) new child buses, the incrementing loop below will + * create a list of virtio-mmio buses with decreasing base addresses. + * + * When a -device option is processed from the command line, + * qbus_find_recursive() picks the next free virtio-mmio bus in forwards + * order. The upshot is that -device options in increasing command line + * order are mapped to virtio-mmio buses with decreasing base addresses. + * + * When this code was originally written, that arrangement ensured that the + * guest Linux kernel would give the lowest "name" (/dev/vda, eth0, etc) to + * the first -device on the command line. (The end-to-end order is a + * function of this loop, qbus_realize(), qbus_find_recursive(), and the + * guest kernel's name-to-address assignment strategy.) + * + * Meanwhile, the kernel's traversal seems to have been reversed; see eg. + * the message, if not necessarily the code, of commit 70161ff336. + * Therefore the loop now establishes the inverse of the original intent. + * + * Unfortunately, we can't counteract the kernel change by reversing the + * loop; it would break existing command lines. + * + * In any case, the kernel makes no guarantee about the stability of + * enumeration order of virtio devices (as demonstrated by it changing + * between kernel versions). For reliable and stable identification + * of disks users must use UUIDs or similar mechanisms. */ for (i = 0; i < NUM_VIRTIO_TRANSPORTS; i++) { int irq = vbi->irqmap[VIRT_MMIO] + i; @@ -453,6 +475,13 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic) sysbus_create_simple("virtio-mmio", base, pic[irq]); } + /* We add dtb nodes in reverse order so that they appear in the finished + * device tree lowest address first. + * + * Note that this mapping is independent of the loop above. The previous + * loop influences virtio device to virtio transport assignment, whereas + * this loop controls how virtio transports are laid out in the dtb. + */ for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) { char *nodename; int irq = vbi->irqmap[VIRT_MMIO] + i; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index d5e0e419c2..d50893002d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -513,7 +513,7 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) * This makes us compatible with old devices * which never set or clear this bit. */ s->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT; - vmstate_save_state(f, pci_get_vmstate(s), s); + vmstate_save_state(f, pci_get_vmstate(s), s, NULL); /* Restore the interrupt status bit. */ pci_update_irq_status(s); } diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 3b77c9a227..4ba8409668 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -62,6 +62,7 @@ typedef struct S390IPLState { static int s390_ipl_init(SysBusDevice *dev) { S390IPLState *ipl = S390_IPL(dev); + uint64_t pentry = KERN_IMAGE_START; int kernel_size; if (!ipl->kernel) { @@ -94,31 +95,31 @@ static int s390_ipl_init(SysBusDevice *dev) hw_error("could not load bootloader '%s'\n", bios_name); } return 0; + } + + kernel_size = load_elf(ipl->kernel, NULL, NULL, &pentry, NULL, + NULL, 1, ELF_MACHINE, 0); + if (kernel_size < 0) { + kernel_size = load_image_targphys(ipl->kernel, 0, ram_size); + } + if (kernel_size < 0) { + fprintf(stderr, "could not load kernel '%s'\n", ipl->kernel); + return -1; + } + /* + * Is it a Linux kernel (starting at 0x10000)? If yes, we fill in the + * kernel parameters here as well. Note: For old kernels (up to 3.2) + * we can not rely on the ELF entry point - it was 0x800 (the SALIPL + * loader) and it won't work. For this case we force it to 0x10000, too. + */ + if (pentry == KERN_IMAGE_START || pentry == 0x800) { + ipl->start_addr = KERN_IMAGE_START; + /* Overwrite parameters in the kernel image, which are "rom" */ + strcpy(rom_ptr(KERN_PARM_AREA), ipl->cmdline); } else { - uint64_t pentry = KERN_IMAGE_START; - kernel_size = load_elf(ipl->kernel, NULL, NULL, &pentry, NULL, - NULL, 1, ELF_MACHINE, 0); - if (kernel_size < 0) { - kernel_size = load_image_targphys(ipl->kernel, 0, ram_size); - } - if (kernel_size < 0) { - fprintf(stderr, "could not load kernel '%s'\n", ipl->kernel); - return -1; - } - /* - * Is it a Linux kernel (starting at 0x10000)? If yes, we fill in the - * kernel parameters here as well. Note: For old kernels (up to 3.2) - * we can not rely on the ELF entry point - it was 0x800 (the SALIPL - * loader) and it won't work. For this case we force it to 0x10000, too. - */ - if (pentry == KERN_IMAGE_START || pentry == 0x800) { - ipl->start_addr = KERN_IMAGE_START; - /* Overwrite parameters in the kernel image, which are "rom" */ - strcpy(rom_ptr(KERN_PARM_AREA), ipl->cmdline); - } else { - ipl->start_addr = pentry; - } + ipl->start_addr = pentry; } + if (ipl->initrd) { ram_addr_t initrd_offset; int initrd_size; diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 1201b8d57c..dc455a2bb7 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -170,7 +170,7 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh) S390pciState *s = S390_PCI_HOST_BRIDGE( object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); - if (!s) { + if (!s || !fh) { return NULL; } @@ -187,7 +187,7 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh) static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh, uint32_t fid, uint64_t faddr, uint32_t e) { - SeiContainer *sei_cont = g_malloc0(sizeof(SeiContainer)); + SeiContainer *sei_cont; S390pciState *s = S390_PCI_HOST_BRIDGE( object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); @@ -195,6 +195,7 @@ static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh, return; } + sei_cont = g_malloc0(sizeof(SeiContainer)); sei_cont->fh = fh; sei_cont->fid = fid; sei_cont->cc = cc; diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 5ea13e5d79..9e5bc5b899 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -487,7 +487,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) CPUS390XState *env = &cpu->env; uint32_t fh; S390PCIBusDevice *pbdev; - ram_addr_t size; + hwaddr start, end; IOMMUTLBEntry entry; MemoryRegion *mr; @@ -504,7 +504,8 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) } fh = env->regs[r1] >> 32; - size = env->regs[r2 + 1]; + start = env->regs[r2]; + end = start + env->regs[r2 + 1]; pbdev = s390_pci_find_dev_by_fh(fh); @@ -515,15 +516,18 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) } mr = pci_device_iommu_address_space(pbdev->pdev)->root; - entry = mr->iommu_ops->translate(mr, env->regs[r2], 0); + while (start < end) { + entry = mr->iommu_ops->translate(mr, start, 0); - if (!entry.translated_addr) { - setcc(cpu, ZPCI_PCI_LS_ERR); - goto out; + if (!entry.translated_addr) { + setcc(cpu, ZPCI_PCI_LS_ERR); + goto out; + } + + memory_region_notify_iommu(mr, entry); + start += entry.addr_mask + 1; } - entry.addr_mask = size - 1; - memory_region_notify_iommu(mr, entry); setcc(cpu, ZPCI_PCI_LS_OK); out: return 0; @@ -784,10 +788,10 @@ int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba) stq_p(&fib.aisb, pbdev->routes.adapter.summary_addr); stq_p(&fib.fmb_addr, pbdev->fmb_addr); - data = (pbdev->isc << 28) | (pbdev->noi << 16) | - (pbdev->routes.adapter.ind_offset << 8) | (pbdev->sum << 7) | - pbdev->routes.adapter.summary_offset; - stw_p(&fib.data, data); + data = ((uint32_t)pbdev->isc << 28) | ((uint32_t)pbdev->noi << 16) | + ((uint32_t)pbdev->routes.adapter.ind_offset << 8) | + ((uint32_t)pbdev->sum << 7) | pbdev->routes.adapter.summary_offset; + stl_p(&fib.data, data); if (pbdev->fh >> ENABLE_BIT_OFFSET) { fib.fc |= 0x80; diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 9b740a3cfa..db39ae0e23 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1756,6 +1756,8 @@ void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier) req->io_canceled = true; if (req->aiocb) { blk_aio_cancel_async(req->aiocb); + } else { + scsi_req_cancel_complete(req); } } diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c index 20b20f0bae..36392359e3 100644 --- a/hw/scsi/spapr_vscsi.c +++ b/hw/scsi/spapr_vscsi.c @@ -630,7 +630,7 @@ static void vscsi_save_request(QEMUFile *f, SCSIRequest *sreq) vscsi_req *req = sreq->hba_private; assert(req->active); - vmstate_save_state(f, &vmstate_spapr_vscsi_req, req); + vmstate_save_state(f, &vmstate_spapr_vscsi_req, req, NULL); DPRINTF("VSCSI: saving tag=%u, current desc#%d, offset=%x\n", req->qtag, req->cur_desc_num, req->cur_desc_offset); diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index 5a107fad5d..0600c9a1fa 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -734,7 +734,7 @@ static int rtc_post_load(void *opaque, int version_id) } static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = { - .name = "irq_reinject_on_ack_count", + .name = "mc146818rtc/irq_reinject_on_ack_count", .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index cf483fffa9..e71385e4fe 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -32,7 +32,7 @@ #include "trace.h" struct vfio_group_head vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_address_spaces); + QLIST_HEAD_INITIALIZER(vfio_group_list); struct vfio_as_head vfio_address_spaces = QLIST_HEAD_INITIALIZER(vfio_address_spaces); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 014a92ce5f..29caabc149 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3065,6 +3065,7 @@ static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); if (vdev->msix) { + object_unparent(OBJECT(&vdev->msix->mmap_mem)); g_free(vdev->msix); vdev->msix = NULL; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 013979a6b8..d735343ca8 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -955,7 +955,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) } /* Subsections */ - vmstate_save_state(f, &vmstate_virtio, vdev); + vmstate_save_state(f, &vmstate_virtio, vdev, NULL); } int virtio_set_features(VirtIODevice *vdev, uint32_t val) diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 0e825ea773..1673287189 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -244,9 +244,31 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx); #undef MEMSUFFIX #endif /* (NB_MMU_MODES >= 6) */ -#if (NB_MMU_MODES > 6) -#error "NB_MMU_MODES > 6 is not supported for now" -#endif /* (NB_MMU_MODES > 6) */ +#if (NB_MMU_MODES >= 7) && defined(MMU_MODE6_SUFFIX) + +#define CPU_MMU_INDEX 6 +#define MEMSUFFIX MMU_MODE6_SUFFIX +#define DATA_SIZE 1 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 2 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 4 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 8 +#include "exec/cpu_ldst_template.h" +#undef CPU_MMU_INDEX +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 7) */ + +#if (NB_MMU_MODES > 7) +/* Note that supporting NB_MMU_MODES == 9 would require + * changes to at least the ARM TCG backend. + */ +#error "NB_MMU_MODES > 7 is not supported for now" +#endif /* (NB_MMU_MODES > 7) */ /* these access are slower, they must be as rare as possible */ #define CPU_MMU_INDEX (cpu_mmu_index(env)) diff --git a/include/exec/memory.h b/include/exec/memory.h index 0cd96b152e..06ffa1d185 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -33,6 +33,7 @@ #include "qemu/notify.h" #include "qapi/error.h" #include "qom/object.h" +#include "qemu/rcu.h" #define MAX_PHYS_ADDR_SPACE_BITS 62 #define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1) @@ -207,9 +208,13 @@ struct MemoryListener { */ struct AddressSpace { /* All fields are private. */ + struct rcu_head rcu; char *name; MemoryRegion *root; + + /* Accessed via RCU. */ struct FlatView *current_map; + int ioeventfd_nb; struct MemoryRegionIoeventfd *ioeventfds; struct AddressSpaceDispatch *dispatch; diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index c4bf56d44f..5c940eb412 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -15,8 +15,7 @@ #include "hw/irq.h" /* armv7m.c */ -qemu_irq *armv7m_init(MemoryRegion *system_memory, - int flash_size, int sram_size, +qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model); /* arm_boot.c */ diff --git a/include/migration/migration.h b/include/migration/migration.h index 3cb5ba80c3..f37348b619 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -33,6 +33,7 @@ #define QEMU_VM_SECTION_END 0x03 #define QEMU_VM_SECTION_FULL 0x04 #define QEMU_VM_SUBSECTION 0x05 +#define QEMU_VM_VMDESCRIPTION 0x06 struct MigrationParams { bool blk; diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index d843c0010c..a923cec2a6 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -121,6 +121,7 @@ QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); int64_t qemu_ftell(QEMUFile *f); +int64_t qemu_ftell_fast(QEMUFile *f); void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size); void qemu_put_byte(QEMUFile *f, int v); /* diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index fa307a6c0f..0b26bc68dd 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -29,6 +29,7 @@ #ifndef CONFIG_USER_ONLY #include <migration/qemu-file.h> #endif +#include <qjson.h> typedef void SaveStateHandler(QEMUFile *f, void *opaque); typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); @@ -801,7 +802,7 @@ extern const VMStateInfo vmstate_info_bitmap; int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, int version_id); void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, - void *opaque); + void *opaque, QJSON *vmdesc); int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, const VMStateDescription *vmsd, diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h index 0ca6cbd0e6..eeaf0cb981 100644 --- a/include/qapi/qmp/qerror.h +++ b/include/qapi/qmp/qerror.h @@ -52,9 +52,6 @@ void qerror_report_err(Error *err); #define QERR_BUS_NOT_FOUND \ ERROR_CLASS_GENERIC_ERROR, "Bus '%s' not found" -#define QERR_COMMAND_NOT_FOUND \ - ERROR_CLASS_COMMAND_NOT_FOUND, "The command %s has not been found" - #define QERR_DEVICE_ENCRYPTED \ ERROR_CLASS_DEVICE_ENCRYPTED, "'%s' (%s) is encrypted" @@ -73,9 +70,6 @@ void qerror_report_err(Error *err); #define QERR_DEVICE_NO_HOTPLUG \ ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging" -#define QERR_DEVICE_NOT_ACTIVE \ - ERROR_CLASS_DEVICE_NOT_ACTIVE, "No %s device has been activated" - #define QERR_DEVICE_NOT_ENCRYPTED \ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not encrypted" @@ -112,9 +106,6 @@ void qerror_report_err(Error *err); #define QERR_JSON_PARSING \ ERROR_CLASS_GENERIC_ERROR, "Invalid JSON syntax" -#define QERR_KVM_MISSING_CAP \ - ERROR_CLASS_KVM_MISSING_CAP, "Using KVM without %s, %s unavailable" - #define QERR_MIGRATION_ACTIVE \ ERROR_CLASS_GENERIC_ERROR, "There's a migration process in progress" diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 93c2ae2f37..98e05ca875 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -129,6 +129,67 @@ #define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) #endif +/** + * atomic_rcu_read - reads a RCU-protected pointer to a local variable + * into a RCU read-side critical section. The pointer can later be safely + * dereferenced within the critical section. + * + * This ensures that the pointer copy is invariant thorough the whole critical + * section. + * + * Inserts memory barriers on architectures that require them (currently only + * Alpha) and documents which pointers are protected by RCU. + * + * Unless the __ATOMIC_CONSUME memory order is available, atomic_rcu_read also + * includes a compiler barrier to ensure that value-speculative optimizations + * (e.g. VSS: Value Speculation Scheduling) does not perform the data read + * before the pointer read by speculating the value of the pointer. On new + * enough compilers, atomic_load takes care of such concern about + * dependency-breaking optimizations. + * + * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). + */ +#ifndef atomic_rcu_read +#ifdef __ATOMIC_CONSUME +#define atomic_rcu_read(ptr) ({ \ + typeof(*ptr) _val; \ + __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \ + _val; \ +}) +#else +#define atomic_rcu_read(ptr) ({ \ + typeof(*ptr) _val = atomic_read(ptr); \ + smp_read_barrier_depends(); \ + _val; \ +}) +#endif +#endif + +/** + * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure + * meant to be read by RCU read-side critical sections. + * + * Documents which pointers will be dereferenced by RCU read-side critical + * sections and adds the required memory barriers on architectures requiring + * them. It also makes sure the compiler does not reorder code initializing the + * data structure before its publication. + * + * Should match atomic_rcu_read(). + */ +#ifndef atomic_rcu_set +#ifdef __ATOMIC_RELEASE +#define atomic_rcu_set(ptr, i) do { \ + typeof(*ptr) _val = (i); \ + __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ +} while(0) +#else +#define atomic_rcu_set(ptr, i) do { \ + smp_wmb(); \ + atomic_set(ptr, i); \ +} while (0) +#endif +#endif + /* These have the same semantics as Java volatile variables. * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html: * "1. Issue a StoreStore barrier (wmb) before each volatile store." diff --git a/include/qemu/queue.h b/include/qemu/queue.h index a98eb3ad79..c602797652 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -104,6 +104,19 @@ struct { \ (head)->lh_first = NULL; \ } while (/*CONSTCOND*/0) +#define QLIST_SWAP(dstlist, srclist, field) do { \ + void *tmplist; \ + tmplist = (srclist)->lh_first; \ + (srclist)->lh_first = (dstlist)->lh_first; \ + if ((srclist)->lh_first != NULL) { \ + (srclist)->lh_first->field.le_prev = &(srclist)->lh_first; \ + } \ + (dstlist)->lh_first = tmplist; \ + if ((dstlist)->lh_first != NULL) { \ + (dstlist)->lh_first->field.le_prev = &(dstlist)->lh_first; \ + } \ +} while (/*CONSTCOND*/0) + #define QLIST_INSERT_AFTER(listelm, elm, field) do { \ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ (listelm)->field.le_next->field.le_prev = \ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h new file mode 100644 index 0000000000..068a279a79 --- /dev/null +++ b/include/qemu/rcu.h @@ -0,0 +1,147 @@ +#ifndef QEMU_RCU_H +#define QEMU_RCU_H + +/* + * urcu-mb.h + * + * Userspace RCU header with explicit memory barrier. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include <stdlib.h> +#include <assert.h> +#include <limits.h> +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <glib.h> + +#include "qemu/compiler.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Important ! + * + * Each thread containing read-side critical sections must be registered + * with rcu_register_thread() before calling rcu_read_lock(). + * rcu_unregister_thread() should be called before the thread exits. + */ + +#ifdef DEBUG_RCU +#define rcu_assert(args...) assert(args) +#else +#define rcu_assert(args...) +#endif + +/* + * Global quiescent period counter with low-order bits unused. + * Using a int rather than a char to eliminate false register dependencies + * causing stalls on some architectures. + */ +extern unsigned long rcu_gp_ctr; + +extern QemuEvent rcu_gp_event; + +struct rcu_reader_data { + /* Data used by both reader and synchronize_rcu() */ + unsigned long ctr; + bool waiting; + + /* Data used by reader only */ + unsigned depth; + + /* Data used for registry, protected by rcu_gp_lock */ + QLIST_ENTRY(rcu_reader_data) node; +}; + +extern __thread struct rcu_reader_data rcu_reader; + +static inline void rcu_read_lock(void) +{ + struct rcu_reader_data *p_rcu_reader = &rcu_reader; + unsigned ctr; + + if (p_rcu_reader->depth++ > 0) { + return; + } + + ctr = atomic_read(&rcu_gp_ctr); + atomic_xchg(&p_rcu_reader->ctr, ctr); + if (atomic_read(&p_rcu_reader->waiting)) { + atomic_set(&p_rcu_reader->waiting, false); + qemu_event_set(&rcu_gp_event); + } +} + +static inline void rcu_read_unlock(void) +{ + struct rcu_reader_data *p_rcu_reader = &rcu_reader; + + assert(p_rcu_reader->depth != 0); + if (--p_rcu_reader->depth > 0) { + return; + } + + atomic_xchg(&p_rcu_reader->ctr, 0); + if (atomic_read(&p_rcu_reader->waiting)) { + atomic_set(&p_rcu_reader->waiting, false); + qemu_event_set(&rcu_gp_event); + } +} + +extern void synchronize_rcu(void); + +/* + * Reader thread registration. + */ +extern void rcu_register_thread(void); +extern void rcu_unregister_thread(void); + +struct rcu_head; +typedef void RCUCBFunc(struct rcu_head *head); + +struct rcu_head { + struct rcu_head *next; + RCUCBFunc *func; +}; + +extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); + +/* The operands of the minus operator must have the same type, + * which must be the one that we specify in the cast. + */ +#define call_rcu(head, func, field) \ + call_rcu1(({ \ + char __attribute__((unused)) \ + offset_must_be_zero[-offsetof(typeof(*(head)), field)], \ + func_type_invalid = (func) - (void (*)(typeof(head)))(func); \ + &(head)->field; \ + }), \ + (RCUCBFunc *)(func)) + +#ifdef __cplusplus +} +#endif + +#endif /* QEMU_RCU_H */ diff --git a/include/qemu/thread.h b/include/qemu/thread.h index e89fdc9785..5114ec8e79 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -25,9 +25,6 @@ void qemu_mutex_lock(QemuMutex *mutex); int qemu_mutex_trylock(QemuMutex *mutex); void qemu_mutex_unlock(QemuMutex *mutex); -#define rcu_read_lock() do { } while (0) -#define rcu_read_unlock() do { } while (0) - void qemu_cond_init(QemuCond *cond); void qemu_cond_destroy(QemuCond *cond); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index ca5befba0e..eba8b2109c 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -838,7 +838,6 @@ static inline int64_t get_clock(void) int64_t cpu_get_icount_raw(void); int64_t cpu_get_icount(void); int64_t cpu_get_clock(void); -int64_t cpu_get_clock_offset(void); int64_t cpu_icount_to_ns(int64_t icount); /*******************************************/ diff --git a/include/qjson.h b/include/qjson.h new file mode 100644 index 0000000000..7c54fdf0ac --- /dev/null +++ b/include/qjson.h @@ -0,0 +1,29 @@ +/* + * QEMU JSON writer + * + * Copyright Alexander Graf + * + * Authors: + * Alexander Graf <agraf@suse.de> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ +#ifndef QEMU_QJSON_H +#define QEMU_QJSON_H + +#define TYPE_QJSON "QJSON" +typedef struct QJSON QJSON; + +QJSON *qjson_new(void); +void json_prop_str(QJSON *json, const char *name, const char *str); +void json_prop_int(QJSON *json, const char *name, int64_t val); +void json_end_array(QJSON *json); +void json_start_array(QJSON *json, const char *name); +void json_end_object(QJSON *json); +void json_start_object(QJSON *json, const char *name); +const char *qjson_get_str(QJSON *json); +void qjson_finish(QJSON *json); + +#endif /* QEMU_QJSON_H */ diff --git a/include/ui/qemu-spice.h b/include/ui/qemu-spice.h index a93b4b2572..762e063125 100644 --- a/include/ui/qemu-spice.h +++ b/include/ui/qemu-spice.h @@ -88,4 +88,14 @@ static inline int qemu_spice_display_add_client(int csock, int skipauth, #endif /* CONFIG_SPICE */ +static inline bool qemu_using_spice(Error **errp) +{ + if (!using_spice) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, + "SPICE is not in use"); + return false; + } + return true; +} + #endif /* QEMU_SPICE_H */ @@ -33,26 +33,12 @@ static bool memory_region_update_pending; static bool ioeventfd_update_pending; static bool global_dirty_log = false; -/* flat_view_mutex is taken around reading as->current_map; the critical - * section is extremely short, so I'm using a single mutex for every AS. - * We could also RCU for the read-side. - * - * The BQL is taken around transaction commits, hence both locks are taken - * while writing to as->current_map (with the BQL taken outside). - */ -static QemuMutex flat_view_mutex; - static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners = QTAILQ_HEAD_INITIALIZER(memory_listeners); static QTAILQ_HEAD(, AddressSpace) address_spaces = QTAILQ_HEAD_INITIALIZER(address_spaces); -static void memory_init(void) -{ - qemu_mutex_init(&flat_view_mutex); -} - typedef struct AddrRange AddrRange; /* @@ -242,6 +228,7 @@ struct FlatRange { * order. */ struct FlatView { + struct rcu_head rcu; unsigned ref; FlatRange *ranges; unsigned nr; @@ -654,10 +641,10 @@ static FlatView *address_space_get_flatview(AddressSpace *as) { FlatView *view; - qemu_mutex_lock(&flat_view_mutex); - view = as->current_map; + rcu_read_lock(); + view = atomic_rcu_read(&as->current_map); flatview_ref(view); - qemu_mutex_unlock(&flat_view_mutex); + rcu_read_unlock(); return view; } @@ -766,10 +753,9 @@ static void address_space_update_topology(AddressSpace *as) address_space_update_topology_pass(as, old_view, new_view, false); address_space_update_topology_pass(as, old_view, new_view, true); - qemu_mutex_lock(&flat_view_mutex); - flatview_unref(as->current_map); - as->current_map = new_view; - qemu_mutex_unlock(&flat_view_mutex); + /* Writes are protected by the BQL. */ + atomic_rcu_set(&as->current_map, new_view); + call_rcu(old_view, flatview_unref, rcu); /* Note that all the old MemoryRegions are still alive up to this * point. This relieves most MemoryListeners from the need to @@ -1263,7 +1249,6 @@ static void memory_region_finalize(Object *obj) MemoryRegion *mr = MEMORY_REGION(obj); assert(QTAILQ_EMPTY(&mr->subregions)); - assert(memory_region_transaction_depth == 0); mr->destructor(mr); memory_region_clear_coalescing(mr); g_free((char *)mr->name); @@ -1843,11 +1828,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, } range = addrrange_make(int128_make64(addr), int128_make64(size)); - view = address_space_get_flatview(as); + rcu_read_lock(); + view = atomic_rcu_read(&as->current_map); fr = flatview_lookup(view, range); if (!fr) { - flatview_unref(view); - return ret; + goto out; } while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) { @@ -1864,8 +1849,8 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, ret.offset_within_address_space = int128_get64(range.start); ret.readonly = fr->readonly; memory_region_ref(ret.mr); - - flatview_unref(view); +out: + rcu_read_unlock(); return ret; } @@ -1958,10 +1943,6 @@ void memory_listener_unregister(MemoryListener *listener) void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) { - if (QTAILQ_EMPTY(&address_spaces)) { - memory_init(); - } - memory_region_transaction_begin(); as->root = root; as->current_map = g_new(FlatView, 1); @@ -1975,15 +1956,10 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) memory_region_transaction_commit(); } -void address_space_destroy(AddressSpace *as) +static void do_address_space_destroy(AddressSpace *as) { MemoryListener *listener; - /* Flush out anything from MemoryListeners listening in on this */ - memory_region_transaction_begin(); - as->root = NULL; - memory_region_transaction_commit(); - QTAILQ_REMOVE(&address_spaces, as, address_spaces_link); address_space_destroy_dispatch(as); QTAILQ_FOREACH(listener, &memory_listeners, link) { @@ -1995,6 +1971,21 @@ void address_space_destroy(AddressSpace *as) g_free(as->ioeventfds); } +void address_space_destroy(AddressSpace *as) +{ + /* Flush out anything from MemoryListeners listening in on this */ + memory_region_transaction_begin(); + as->root = NULL; + memory_region_transaction_commit(); + QTAILQ_REMOVE(&address_spaces, as, address_spaces_link); + + /* At this point, as->dispatch and as->current_map are dummy + * entries that the guest should never use. Wait for the old + * values to expire before freeing the data. + */ + call_rcu(as, do_address_space_destroy, rcu); +} + bool io_mem_read(MemoryRegion *mr, hwaddr addr, uint64_t *pval, unsigned size) { return memory_region_dispatch_read(mr, addr, pval, size); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index edc283073a..e66e55712f 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -452,6 +452,22 @@ int qemu_get_byte(QEMUFile *f) return result; } +int64_t qemu_ftell_fast(QEMUFile *f) +{ + int64_t ret = f->pos; + int i; + + if (f->ops->writev_buffer) { + for (i = 0; i < f->iovcnt; i++) { + ret += f->iov[i].iov_len; + } + } else { + ret += f->buf_index; + } + + return ret; +} + int64_t qemu_ftell(QEMUFile *f) { qemu_fflush(f); diff --git a/migration/rdma.c b/migration/rdma.c index b32dbdfccd..fc351eabf2 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -26,34 +26,7 @@ #include <arpa/inet.h> #include <string.h> #include <rdma/rdma_cma.h> - -//#define DEBUG_RDMA -//#define DEBUG_RDMA_VERBOSE -//#define DEBUG_RDMA_REALLY_VERBOSE - -#ifdef DEBUG_RDMA -#define DPRINTF(fmt, ...) \ - do { printf("rdma: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - -#ifdef DEBUG_RDMA_VERBOSE -#define DDPRINTF(fmt, ...) \ - do { printf("rdma: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DDPRINTF(fmt, ...) \ - do { } while (0) -#endif - -#ifdef DEBUG_RDMA_REALLY_VERBOSE -#define DDDPRINTF(fmt, ...) \ - do { printf("rdma: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DDDPRINTF(fmt, ...) \ - do { } while (0) -#endif +#include "trace.h" /* * Print and error on both the Monitor and the Log file. @@ -104,8 +77,8 @@ static uint32_t known_capabilities = RDMA_CAPABILITY_PIN_ALL; do { \ if (rdma->error_state) { \ if (!rdma->error_reported) { \ - fprintf(stderr, "RDMA is in an error state waiting migration" \ - " to abort!\n"); \ + error_report("RDMA is in an error state waiting migration" \ + " to abort!"); \ rdma->error_reported = 1; \ } \ return rdma->error_state; \ @@ -578,12 +551,13 @@ static int __qemu_rdma_add_block(RDMAContext *rdma, void *host_addr, g_hash_table_insert(rdma->blockmap, (void *) block_offset, block); - DDPRINTF("Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 - " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d\n", - local->nb_blocks, (uint64_t) block->local_host_addr, block->offset, - block->length, (uint64_t) (block->local_host_addr + block->length), - BITS_TO_LONGS(block->nb_chunks) * - sizeof(unsigned long) * 8, block->nb_chunks); + trace___qemu_rdma_add_block(local->nb_blocks, + (uint64_t) block->local_host_addr, block->offset, + block->length, + (uint64_t) (block->local_host_addr + block->length), + BITS_TO_LONGS(block->nb_chunks) * + sizeof(unsigned long) * 8, + block->nb_chunks); local->nb_blocks++; @@ -614,7 +588,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); memset(local, 0, sizeof *local); qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma); - DPRINTF("Allocated %d local ram block structures\n", local->nb_blocks); + trace_qemu_rdma_init_ram_blocks(local->nb_blocks); rdma->block = (RDMARemoteBlock *) g_malloc0(sizeof(RDMARemoteBlock) * rdma->local_ram_blocks.nb_blocks); local->init = true; @@ -683,12 +657,12 @@ static int __qemu_rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) local->block = NULL; } - DDPRINTF("Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 - " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d\n", - local->nb_blocks, (uint64_t) block->local_host_addr, block->offset, - block->length, (uint64_t) (block->local_host_addr + block->length), - BITS_TO_LONGS(block->nb_chunks) * - sizeof(unsigned long) * 8, block->nb_chunks); + trace___qemu_rdma_delete_block(local->nb_blocks, + (uint64_t)block->local_host_addr, + block->offset, block->length, + (uint64_t)(block->local_host_addr + block->length), + BITS_TO_LONGS(block->nb_chunks) * + sizeof(unsigned long) * 8, block->nb_chunks); g_free(old); @@ -713,7 +687,7 @@ static void qemu_rdma_dump_id(const char *who, struct ibv_context *verbs) struct ibv_port_attr port; if (ibv_query_port(verbs, 1, &port)) { - fprintf(stderr, "FAILED TO QUERY PORT INFORMATION!\n"); + error_report("Failed to query port information"); return; } @@ -744,7 +718,7 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id) char dgid[33]; inet_ntop(AF_INET6, &id->route.addr.addr.ibaddr.sgid, sgid, sizeof sgid); inet_ntop(AF_INET6, &id->route.addr.addr.ibaddr.dgid, dgid, sizeof dgid); - DPRINTF("%s Source GID: %s, Dest GID: %s\n", who, sgid, dgid); + trace_qemu_rdma_dump_gid(who, sgid, dgid); } /* @@ -918,7 +892,7 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp) for (e = res; e != NULL; e = e->ai_next) { inet_ntop(e->ai_family, &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip); - DPRINTF("Trying %s => %s\n", rdma->host, ip); + trace_qemu_rdma_resolve_host_trying(rdma->host, ip); ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_dst_addr, RDMA_RESOLVE_TIMEOUT_MS); @@ -997,14 +971,14 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) /* allocate pd */ rdma->pd = ibv_alloc_pd(rdma->verbs); if (!rdma->pd) { - fprintf(stderr, "failed to allocate protection domain\n"); + error_report("failed to allocate protection domain"); return -1; } /* create completion channel */ rdma->comp_channel = ibv_create_comp_channel(rdma->verbs); if (!rdma->comp_channel) { - fprintf(stderr, "failed to allocate completion channel\n"); + error_report("failed to allocate completion channel"); goto err_alloc_pd_cq; } @@ -1015,7 +989,7 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) rdma->cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), NULL, rdma->comp_channel, 0); if (!rdma->cq) { - fprintf(stderr, "failed to allocate completion queue\n"); + error_report("failed to allocate completion queue"); goto err_alloc_pd_cq; } @@ -1160,8 +1134,7 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma, if (!block->pmr[chunk]) { uint64_t len = chunk_end - chunk_start; - DDPRINTF("Registering %" PRIu64 " bytes @ %p\n", - len, chunk_start); + trace_qemu_rdma_register_and_get_keys(len, chunk_start); block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, @@ -1204,7 +1177,7 @@ static int qemu_rdma_reg_control(RDMAContext *rdma, int idx) rdma->total_registrations++; return 0; } - fprintf(stderr, "qemu_rdma_reg_control failed!\n"); + error_report("qemu_rdma_reg_control failed"); return -1; } @@ -1270,8 +1243,8 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) .repeat = 1, }; - DDPRINTF("Processing unregister for chunk: %" PRIu64 - " at position %d\n", chunk, rdma->unregister_current); + trace_qemu_rdma_unregister_waiting_proc(chunk, + rdma->unregister_current); rdma->unregistrations[rdma->unregister_current] = 0; rdma->unregister_current++; @@ -1291,11 +1264,11 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) clear_bit(chunk, block->unregister_bitmap); if (test_bit(chunk, block->transit_bitmap)) { - DDPRINTF("Cannot unregister inflight chunk: %" PRIu64 "\n", chunk); + trace_qemu_rdma_unregister_waiting_inflight(chunk); continue; } - DDPRINTF("Sending unregister for chunk: %" PRIu64 "\n", chunk); + trace_qemu_rdma_unregister_waiting_send(chunk); ret = ibv_dereg_mr(block->pmr[chunk]); block->pmr[chunk] = NULL; @@ -1315,7 +1288,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) return ret; } - DDPRINTF("Unregister for chunk: %" PRIu64 " complete.\n", chunk); + trace_qemu_rdma_unregister_waiting_complete(chunk); } return 0; @@ -1340,13 +1313,13 @@ static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index, uint64_t chunk, uint64_t wr_id) { if (rdma->unregistrations[rdma->unregister_next] != 0) { - fprintf(stderr, "rdma migration: queue is full!\n"); + error_report("rdma migration: queue is full"); } else { RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]); if (!test_and_set_bit(chunk, block->unregister_bitmap)) { - DDPRINTF("Appending unregister chunk %" PRIu64 - " at position %d\n", chunk, rdma->unregister_next); + trace_qemu_rdma_signal_unregister_append(chunk, + rdma->unregister_next); rdma->unregistrations[rdma->unregister_next++] = qemu_rdma_make_wrid(wr_id, index, chunk); @@ -1355,8 +1328,7 @@ static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index, rdma->unregister_next = 0; } } else { - DDPRINTF("Unregister chunk %" PRIu64 " already in queue.\n", - chunk); + trace_qemu_rdma_signal_unregister_already(chunk); } } } @@ -1381,7 +1353,7 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, } if (ret < 0) { - fprintf(stderr, "ibv_poll_cq return %d!\n", ret); + error_report("ibv_poll_cq return %d", ret); return ret; } @@ -1397,8 +1369,7 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, if (rdma->control_ready_expected && (wr_id >= RDMA_WRID_RECV_CONTROL)) { - DDDPRINTF("completion %s #%" PRId64 " received (%" PRId64 ")" - " left %d\n", wrid_desc[RDMA_WRID_RECV_CONTROL], + trace_qemu_rdma_poll_recv(wrid_desc[RDMA_WRID_RECV_CONTROL], wr_id - RDMA_WRID_RECV_CONTROL, wr_id, rdma->nb_sent); rdma->control_ready_expected = 0; } @@ -1410,9 +1381,8 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, (wc.wr_id & RDMA_WRID_BLOCK_MASK) >> RDMA_WRID_BLOCK_SHIFT; RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]); - DDDPRINTF("completions %s (%" PRId64 ") left %d, " - "block %" PRIu64 ", chunk: %" PRIu64 " %p %p\n", - print_wrid(wr_id), wr_id, rdma->nb_sent, index, chunk, + trace_qemu_rdma_poll_write(print_wrid(wr_id), wr_id, rdma->nb_sent, + index, chunk, block->local_host_addr, (void *)block->remote_host_addr); clear_bit(chunk, block->transit_bitmap); @@ -1433,8 +1403,7 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, #endif } } else { - DDDPRINTF("other completion %s (%" PRId64 ") received left %d\n", - print_wrid(wr_id), wr_id, rdma->nb_sent); + trace_qemu_rdma_poll_other(print_wrid(wr_id), wr_id, rdma->nb_sent); } *wr_id_out = wc.wr_id; @@ -1482,9 +1451,8 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, break; } if (wr_id != wrid_requested) { - DDDPRINTF("A Wanted wrid %s (%d) but got %s (%" PRIu64 ")\n", - print_wrid(wrid_requested), - wrid_requested, print_wrid(wr_id), wr_id); + trace_qemu_rdma_block_for_wrid_miss(print_wrid(wrid_requested), + wrid_requested, print_wrid(wr_id), wr_id); } } @@ -1524,9 +1492,8 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, break; } if (wr_id != wrid_requested) { - DDDPRINTF("B Wanted wrid %s (%d) but got %s (%" PRIu64 ")\n", - print_wrid(wrid_requested), wrid_requested, - print_wrid(wr_id), wr_id); + trace_qemu_rdma_block_for_wrid_miss(print_wrid(wrid_requested), + wrid_requested, print_wrid(wr_id), wr_id); } } @@ -1571,7 +1538,7 @@ static int qemu_rdma_post_send_control(RDMAContext *rdma, uint8_t *buf, .num_sge = 1, }; - DDDPRINTF("CONTROL: sending %s..\n", control_desc[head->type]); + trace_qemu_rdma_post_send_control(control_desc[head->type]); /* * We don't actually need to do a memcpy() in here if we used @@ -1593,13 +1560,13 @@ static int qemu_rdma_post_send_control(RDMAContext *rdma, uint8_t *buf, ret = ibv_post_send(rdma->qp, &send_wr, &bad_wr); if (ret > 0) { - fprintf(stderr, "Failed to use post IB SEND for control!\n"); + error_report("Failed to use post IB SEND for control"); return -ret; } ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_SEND_CONTROL, NULL); if (ret < 0) { - fprintf(stderr, "rdma migration: send polling control error!\n"); + error_report("rdma migration: send polling control error"); } return ret; @@ -1643,32 +1610,31 @@ static int qemu_rdma_exchange_get_response(RDMAContext *rdma, &byte_len); if (ret < 0) { - fprintf(stderr, "rdma migration: recv polling control error!\n"); + error_report("rdma migration: recv polling control error!"); return ret; } network_to_control((void *) rdma->wr_data[idx].control); memcpy(head, rdma->wr_data[idx].control, sizeof(RDMAControlHeader)); - DDDPRINTF("CONTROL: %s receiving...\n", control_desc[expecting]); + trace_qemu_rdma_exchange_get_response_start(control_desc[expecting]); if (expecting == RDMA_CONTROL_NONE) { - DDDPRINTF("Surprise: got %s (%d)\n", - control_desc[head->type], head->type); + trace_qemu_rdma_exchange_get_response_none(control_desc[head->type], + head->type); } else if (head->type != expecting || head->type == RDMA_CONTROL_ERROR) { - fprintf(stderr, "Was expecting a %s (%d) control message" - ", but got: %s (%d), length: %d\n", + error_report("Was expecting a %s (%d) control message" + ", but got: %s (%d), length: %d", control_desc[expecting], expecting, control_desc[head->type], head->type, head->len); return -EIO; } if (head->len > RDMA_CONTROL_MAX_BUFFER - sizeof(*head)) { - fprintf(stderr, "too long length: %d\n", head->len); + error_report("too long length: %d\n", head->len); return -EINVAL; } if (sizeof(*head) + head->len != byte_len) { - fprintf(stderr, "Malformed length: %d byte_len %d\n", - head->len, byte_len); + error_report("Malformed length: %d byte_len %d", head->len, byte_len); return -EINVAL; } @@ -1730,7 +1696,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, if (resp) { ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_DATA); if (ret) { - fprintf(stderr, "rdma migration: error posting" + error_report("rdma migration: error posting" " extra control recv for anticipated result!"); return ret; } @@ -1741,7 +1707,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, */ ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); if (ret) { - fprintf(stderr, "rdma migration: error posting first control recv!"); + error_report("rdma migration: error posting first control recv!"); return ret; } @@ -1751,7 +1717,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, ret = qemu_rdma_post_send_control(rdma, data, head); if (ret < 0) { - fprintf(stderr, "Failed to send control buffer!\n"); + error_report("Failed to send control buffer!"); return ret; } @@ -1760,14 +1726,14 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, */ if (resp) { if (callback) { - DDPRINTF("Issuing callback before receiving response...\n"); + trace_qemu_rdma_exchange_send_issue_callback(); ret = callback(rdma); if (ret < 0) { return ret; } } - DDPRINTF("Waiting for response %s\n", control_desc[resp->type]); + trace_qemu_rdma_exchange_send_waiting(control_desc[resp->type]); ret = qemu_rdma_exchange_get_response(rdma, resp, resp->type, RDMA_WRID_DATA); @@ -1779,7 +1745,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, if (resp_idx) { *resp_idx = RDMA_WRID_DATA; } - DDPRINTF("Response %s received.\n", control_desc[resp->type]); + trace_qemu_rdma_exchange_send_received(control_desc[resp->type]); } rdma->control_ready_expected = 1; @@ -1807,7 +1773,7 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, ret = qemu_rdma_post_send_control(rdma, NULL, &ready); if (ret < 0) { - fprintf(stderr, "Failed to send control buffer!\n"); + error_report("Failed to send control buffer!"); return ret; } @@ -1828,7 +1794,7 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, */ ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); if (ret) { - fprintf(stderr, "rdma migration: error posting second control recv!"); + error_report("rdma migration: error posting second control recv!"); return ret; } @@ -1882,8 +1848,9 @@ retry: } } - DDPRINTF("Writing %" PRIu64 " chunks, (%" PRIu64 " MB)\n", - chunks + 1, (chunks + 1) * (1UL << RDMA_REG_CHUNK_SHIFT) / 1024 / 1024); + trace_qemu_rdma_write_one_top(chunks + 1, + (chunks + 1) * + (1UL << RDMA_REG_CHUNK_SHIFT) / 1024 / 1024); chunk_end = ram_chunk_end(block, chunk + chunks); @@ -1895,17 +1862,15 @@ retry: while (test_bit(chunk, block->transit_bitmap)) { (void)count; - DDPRINTF("(%d) Not clobbering: block: %d chunk %" PRIu64 - " current %" PRIu64 " len %" PRIu64 " %d %d\n", - count++, current_index, chunk, + trace_qemu_rdma_write_one_block(count++, current_index, chunk, sge.addr, length, rdma->nb_sent, block->nb_chunks); ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL); if (ret < 0) { - fprintf(stderr, "Failed to Wait for previous write to complete " + error_report("Failed to Wait for previous write to complete " "block %d chunk %" PRIu64 - " current %" PRIu64 " len %" PRIu64 " %d\n", + " current %" PRIu64 " len %" PRIu64 " %d", current_index, chunk, sge.addr, length, rdma->nb_sent); return ret; } @@ -1932,10 +1897,8 @@ retry: head.len = sizeof(comp); head.type = RDMA_CONTROL_COMPRESS; - DDPRINTF("Entire chunk is zero, sending compress: %" - PRIu64 " for %d " - "bytes, index: %d, offset: %" PRId64 "...\n", - chunk, sge.length, current_index, current_addr); + trace_qemu_rdma_write_one_zero(chunk, sge.length, + current_index, current_addr); compress_to_network(&comp); ret = qemu_rdma_exchange_send(rdma, &head, @@ -1961,9 +1924,8 @@ retry: } reg.chunks = chunks; - DDPRINTF("Sending registration request chunk %" PRIu64 " for %d " - "bytes, index: %d, offset: %" PRId64 "...\n", - chunk, sge.length, current_index, current_addr); + trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index, + current_addr); register_to_network(®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, @@ -1977,7 +1939,7 @@ retry: (uint8_t *) sge.addr, &sge.lkey, NULL, chunk, chunk_start, chunk_end)) { - fprintf(stderr, "cannot get lkey!\n"); + error_report("cannot get lkey"); return -EINVAL; } @@ -1986,9 +1948,8 @@ retry: network_to_result(reg_result); - DDPRINTF("Received registration result:" - " my key: %x their key %x, chunk %" PRIu64 "\n", - block->remote_keys[chunk], reg_result->rkey, chunk); + trace_qemu_rdma_write_one_recvregres(block->remote_keys[chunk], + reg_result->rkey, chunk); block->remote_keys[chunk] = reg_result->rkey; block->remote_host_addr = reg_result->host_addr; @@ -1998,7 +1959,7 @@ retry: (uint8_t *)sge.addr, &sge.lkey, NULL, chunk, chunk_start, chunk_end)) { - fprintf(stderr, "cannot get lkey!\n"); + error_report("cannot get lkey!"); return -EINVAL; } } @@ -2010,7 +1971,7 @@ retry: if (qemu_rdma_register_and_get_keys(rdma, block, (uint8_t *)sge.addr, &sge.lkey, NULL, chunk, chunk_start, chunk_end)) { - fprintf(stderr, "cannot get lkey!\n"); + error_report("cannot get lkey!"); return -EINVAL; } } @@ -2031,10 +1992,8 @@ retry: send_wr.wr.rdma.remote_addr = block->remote_host_addr + (current_addr - block->offset); - DDDPRINTF("Posting chunk: %" PRIu64 ", addr: %lx" - " remote: %lx, bytes %" PRIu32 "\n", - chunk, sge.addr, send_wr.wr.rdma.remote_addr, - sge.length); + trace_qemu_rdma_write_one_post(chunk, sge.addr, send_wr.wr.rdma.remote_addr, + sge.length); /* * ibv_post_send() does not return negative error numbers, @@ -2043,11 +2002,11 @@ retry: ret = ibv_post_send(rdma->qp, &send_wr, &bad_wr); if (ret == ENOMEM) { - DDPRINTF("send queue is full. wait a little....\n"); + trace_qemu_rdma_write_one_queue_full(); ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL); if (ret < 0) { - fprintf(stderr, "rdma migration: failed to make " - "room in full send queue! %d\n", ret); + error_report("rdma migration: failed to make " + "room in full send queue! %d", ret); return ret; } @@ -2088,7 +2047,7 @@ static int qemu_rdma_write_flush(QEMUFile *f, RDMAContext *rdma) if (ret == 0) { rdma->nb_sent++; - DDDPRINTF("sent total: %d\n", rdma->nb_sent); + trace_qemu_rdma_write_flush(rdma->nb_sent); } rdma->current_length = 0; @@ -2173,7 +2132,7 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma, ret = qemu_rdma_search_ram_block(rdma, block_offset, offset, len, &index, &chunk); if (ret) { - fprintf(stderr, "ram block search failed\n"); + error_report("ram block search failed"); return ret; } rdma->current_index = index; @@ -2202,19 +2161,19 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) .type = RDMA_CONTROL_ERROR, .repeat = 1, }; - fprintf(stderr, "Early error. Sending error.\n"); + error_report("Early error. Sending error."); qemu_rdma_post_send_control(rdma, NULL, &head); } ret = rdma_disconnect(rdma->cm_id); if (!ret) { - DDPRINTF("waiting for disconnect\n"); + trace_qemu_rdma_cleanup_waiting_for_disconnect(); ret = rdma_get_cm_event(rdma->channel, &cm_event); if (!ret) { rdma_ack_cm_event(cm_event); } } - DDPRINTF("Disconnected.\n"); + trace_qemu_rdma_cleanup_disconnect(); rdma->connected = false; } @@ -2341,7 +2300,7 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) * on the source first requested the capability. */ if (rdma->pin_all) { - DPRINTF("Server pin-all memory requested.\n"); + trace_qemu_rdma_connect_pin_all_requested(); cap.flags |= RDMA_CAPABILITY_PIN_ALL; } @@ -2389,7 +2348,7 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) rdma->pin_all = false; } - DPRINTF("Pin all memory: %s\n", rdma->pin_all ? "enabled" : "disabled"); + trace_qemu_rdma_connect_pin_all_outcome(rdma->pin_all); rdma_ack_cm_event(cm_event); @@ -2456,7 +2415,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp) for (e = res; e != NULL; e = e->ai_next) { inet_ntop(e->ai_family, &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip); - DPRINTF("Trying %s => %s\n", rdma->host, ip); + trace_qemu_rdma_dest_init_trying(rdma->host, ip); ret = rdma_bind_addr(listen_id, e->ai_dst_addr); if (!ret) { if (e->ai_family == AF_INET6) { @@ -2575,8 +2534,7 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf, size_t len = 0; if (rdma->wr_data[idx].control_len) { - DDDPRINTF("RDMA %" PRId64 " of %d bytes already in buffer\n", - rdma->wr_data[idx].control_len, size); + trace_qemu_rdma_fill(rdma->wr_data[idx].control_len, size); len = MIN(size, rdma->wr_data[idx].control_len); memcpy(buf, rdma->wr_data[idx].control_curr, len); @@ -2643,7 +2601,7 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma) while (rdma->nb_sent) { ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL); if (ret < 0) { - fprintf(stderr, "rdma migration: complete polling error!\n"); + error_report("rdma migration: complete polling error!"); return -EIO; } } @@ -2655,7 +2613,7 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma) static int qemu_rdma_close(void *opaque) { - DPRINTF("Shutting down connection.\n"); + trace_qemu_rdma_close(); QEMUFileRDMA *r = opaque; if (r->rdma) { qemu_rdma_cleanup(r->rdma); @@ -2719,7 +2677,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, */ ret = qemu_rdma_write(f, rdma, block_offset, offset, size); if (ret < 0) { - fprintf(stderr, "rdma migration: write error! %d\n", ret); + error_report("rdma migration: write error! %d", ret); goto err; } @@ -2752,7 +2710,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, offset, size, &index, &chunk); if (ret) { - fprintf(stderr, "ram block search failed\n"); + error_report("ram block search failed"); goto err; } @@ -2779,7 +2737,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, uint64_t wr_id, wr_id_in; int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL); if (ret < 0) { - fprintf(stderr, "rdma migration: polling error! %d\n", ret); + error_report("rdma migration: polling error! %d", ret); goto err; } @@ -2824,7 +2782,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) network_to_caps(&cap); if (cap.version < 1 || cap.version > RDMA_CONTROL_VERSION_CURRENT) { - fprintf(stderr, "Unknown source RDMA version: %d, bailing...\n", + error_report("Unknown source RDMA version: %d, bailing...", cap.version); rdma_ack_cm_event(cm_event); goto err_rdma_dest_wait; @@ -2848,17 +2806,17 @@ static int qemu_rdma_accept(RDMAContext *rdma) rdma_ack_cm_event(cm_event); - DPRINTF("Memory pin all: %s\n", rdma->pin_all ? "enabled" : "disabled"); + trace_qemu_rdma_accept_pin_state(rdma->pin_all); caps_to_network(&cap); - DPRINTF("verbs context after listen: %p\n", verbs); + trace_qemu_rdma_accept_pin_verbsc(verbs); if (!rdma->verbs) { rdma->verbs = verbs; } else if (rdma->verbs != verbs) { - fprintf(stderr, "ibv context not matching %p, %p!\n", - rdma->verbs, verbs); + error_report("ibv context not matching %p, %p!", rdma->verbs, + verbs); goto err_rdma_dest_wait; } @@ -2866,26 +2824,26 @@ static int qemu_rdma_accept(RDMAContext *rdma) ret = qemu_rdma_alloc_pd_cq(rdma); if (ret) { - fprintf(stderr, "rdma migration: error allocating pd and cq!\n"); + error_report("rdma migration: error allocating pd and cq!"); goto err_rdma_dest_wait; } ret = qemu_rdma_alloc_qp(rdma); if (ret) { - fprintf(stderr, "rdma migration: error allocating qp!\n"); + error_report("rdma migration: error allocating qp!"); goto err_rdma_dest_wait; } ret = qemu_rdma_init_ram_blocks(rdma); if (ret) { - fprintf(stderr, "rdma migration: error initializing ram blocks!\n"); + error_report("rdma migration: error initializing ram blocks!"); goto err_rdma_dest_wait; } for (idx = 0; idx < RDMA_WRID_MAX; idx++) { ret = qemu_rdma_reg_control(rdma, idx); if (ret) { - fprintf(stderr, "rdma: error registering %d control!\n", idx); + error_report("rdma: error registering %d control", idx); goto err_rdma_dest_wait; } } @@ -2894,18 +2852,18 @@ static int qemu_rdma_accept(RDMAContext *rdma) ret = rdma_accept(rdma->cm_id, &conn_param); if (ret) { - fprintf(stderr, "rdma_accept returns %d!\n", ret); + error_report("rdma_accept returns %d", ret); goto err_rdma_dest_wait; } ret = rdma_get_cm_event(rdma->channel, &cm_event); if (ret) { - fprintf(stderr, "rdma_accept get_cm_event failed %d!\n", ret); + error_report("rdma_accept get_cm_event failed %d", ret); goto err_rdma_dest_wait; } if (cm_event->event != RDMA_CM_EVENT_ESTABLISHED) { - fprintf(stderr, "rdma_accept not event established!\n"); + error_report("rdma_accept not event established"); rdma_ack_cm_event(cm_event); goto err_rdma_dest_wait; } @@ -2915,7 +2873,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); if (ret) { - fprintf(stderr, "rdma migration: error posting second control recv!\n"); + error_report("rdma migration: error posting second control recv"); goto err_rdma_dest_wait; } @@ -2969,7 +2927,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); do { - DDDPRINTF("Waiting for next request %" PRIu64 "...\n", flags); + trace_qemu_rdma_registration_handle_wait(flags); ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE); @@ -2978,8 +2936,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, } if (head.repeat > RDMA_CONTROL_MAX_COMMANDS_PER_MESSAGE) { - fprintf(stderr, "rdma: Too many requests in this message (%d)." - "Bailing.\n", head.repeat); + error_report("rdma: Too many requests in this message (%d)." + "Bailing.", head.repeat); ret = -EIO; break; } @@ -2989,9 +2947,9 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, comp = (RDMACompress *) rdma->wr_data[idx].control_curr; network_to_compress(comp); - DDPRINTF("Zapping zero chunk: %" PRId64 - " bytes, index %d, offset %" PRId64 "\n", - comp->length, comp->block_idx, comp->offset); + trace_qemu_rdma_registration_handle_compress(comp->length, + comp->block_idx, + comp->offset); block = &(rdma->local_ram_blocks.block[comp->block_idx]); host_addr = block->local_host_addr + @@ -3001,17 +2959,17 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, break; case RDMA_CONTROL_REGISTER_FINISHED: - DDDPRINTF("Current registrations complete.\n"); + trace_qemu_rdma_registration_handle_finished(); goto out; case RDMA_CONTROL_RAM_BLOCKS_REQUEST: - DPRINTF("Initial setup info requested.\n"); + trace_qemu_rdma_registration_handle_ram_blocks(); if (rdma->pin_all) { ret = qemu_rdma_reg_whole_ram_blocks(rdma); if (ret) { - fprintf(stderr, "rdma migration: error dest " - "registering ram blocks!\n"); + error_report("rdma migration: error dest " + "registering ram blocks"); goto out; } } @@ -3044,13 +3002,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, (uint8_t *) rdma->block, &blocks); if (ret < 0) { - fprintf(stderr, "rdma migration: error sending remote info!\n"); + error_report("rdma migration: error sending remote info"); goto out; } break; case RDMA_CONTROL_REGISTER_REQUEST: - DDPRINTF("There are %d registration requests\n", head.repeat); + trace_qemu_rdma_registration_handle_register(head.repeat); reg_resp.repeat = head.repeat; registers = (RDMARegister *) rdma->wr_data[idx].control_curr; @@ -3064,8 +3022,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, reg_result = &results[count]; - DDPRINTF("Registration request (%d): index %d, current_addr %" - PRIu64 " chunks: %" PRIu64 "\n", count, + trace_qemu_rdma_registration_handle_register_loop(count, reg->current_index, reg->key.current_addr, reg->chunks); block = &(rdma->local_ram_blocks.block[reg->current_index]); @@ -3084,15 +3041,15 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, if (qemu_rdma_register_and_get_keys(rdma, block, (uint8_t *)host_addr, NULL, ®_result->rkey, chunk, chunk_start, chunk_end)) { - fprintf(stderr, "cannot get rkey!\n"); + error_report("cannot get rkey"); ret = -EINVAL; goto out; } reg_result->host_addr = (uint64_t) block->local_host_addr; - DDPRINTF("Registered rkey for this request: %x\n", - reg_result->rkey); + trace_qemu_rdma_registration_handle_register_rkey( + reg_result->rkey); result_to_network(reg_result); } @@ -3101,12 +3058,12 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, (uint8_t *) results, ®_resp); if (ret < 0) { - fprintf(stderr, "Failed to send control buffer!\n"); + error_report("Failed to send control buffer"); goto out; } break; case RDMA_CONTROL_UNREGISTER_REQUEST: - DDPRINTF("There are %d unregistration requests\n", head.repeat); + trace_qemu_rdma_registration_handle_unregister(head.repeat); unreg_resp.repeat = head.repeat; registers = (RDMARegister *) rdma->wr_data[idx].control_curr; @@ -3114,9 +3071,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, reg = ®isters[count]; network_to_register(reg); - DDPRINTF("Unregistration request (%d): " - " index %d, chunk %" PRIu64 "\n", - count, reg->current_index, reg->key.chunk); + trace_qemu_rdma_registration_handle_unregister_loop(count, + reg->current_index, reg->key.chunk); block = &(rdma->local_ram_blocks.block[reg->current_index]); @@ -3131,24 +3087,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, rdma->total_registrations--; - DDPRINTF("Unregistered chunk %" PRIu64 " successfully.\n", - reg->key.chunk); + trace_qemu_rdma_registration_handle_unregister_success( + reg->key.chunk); } ret = qemu_rdma_post_send_control(rdma, NULL, &unreg_resp); if (ret < 0) { - fprintf(stderr, "Failed to send control buffer!\n"); + error_report("Failed to send control buffer"); goto out; } break; case RDMA_CONTROL_REGISTER_RESULT: - fprintf(stderr, "Invalid RESULT message at dest.\n"); + error_report("Invalid RESULT message at dest."); ret = -EIO; goto out; default: - fprintf(stderr, "Unknown control message %s\n", - control_desc[head.type]); + error_report("Unknown control message %s", control_desc[head.type]); ret = -EIO; goto out; } @@ -3168,7 +3123,7 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); - DDDPRINTF("start section: %" PRIu64 "\n", flags); + trace_qemu_rdma_registration_start(flags); qemu_put_be64(f, RAM_SAVE_FLAG_HOOK); qemu_fflush(f); @@ -3203,7 +3158,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, int reg_result_idx, i, j, nb_remote_blocks; head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST; - DPRINTF("Sending registration setup for ram blocks...\n"); + trace_qemu_rdma_registration_stop_ram(); /* * Make sure that we parallelize the pinning on both sides. @@ -3275,7 +3230,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, } } - DDDPRINTF("Sending registration finish %" PRIu64 "...\n", flags); + trace_qemu_rdma_registration_stop(flags); head.type = RDMA_CONTROL_REGISTER_FINISHED; ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL); @@ -3339,7 +3294,7 @@ static void rdma_accept_incoming_migration(void *opaque) QEMUFile *f; Error *local_err = NULL, **errp = &local_err; - DPRINTF("Accepting rdma connection...\n"); + trace_qemu_dma_accept_incoming_migration(); ret = qemu_rdma_accept(rdma); if (ret) { @@ -3347,7 +3302,7 @@ static void rdma_accept_incoming_migration(void *opaque) return; } - DPRINTF("Accepted migration\n"); + trace_qemu_dma_accept_incoming_migration_accepted(); f = qemu_fopen_rdma(rdma, "rb"); if (f == NULL) { @@ -3366,7 +3321,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) RDMAContext *rdma; Error *local_err = NULL; - DPRINTF("Starting RDMA-based incoming migration\n"); + trace_rdma_start_incoming_migration(); rdma = qemu_rdma_data_init(host_port, &local_err); if (rdma == NULL) { @@ -3379,7 +3334,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) goto err; } - DPRINTF("qemu_rdma_dest_init success\n"); + trace_rdma_start_incoming_migration_after_dest_init(); ret = rdma_listen(rdma->listen_id, 5); @@ -3388,7 +3343,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) goto err; } - DPRINTF("rdma_listen success\n"); + trace_rdma_start_incoming_migration_after_rdma_listen(); qemu_set_fd_handler2(rdma->channel->fd, NULL, rdma_accept_incoming_migration, NULL, @@ -3419,14 +3374,14 @@ void rdma_start_outgoing_migration(void *opaque, goto err; } - DPRINTF("qemu_rdma_source_init success\n"); + trace_rdma_start_outgoing_migration_after_rdma_source_init(); ret = qemu_rdma_connect(rdma, &local_err); if (ret) { goto err; } - DPRINTF("qemu_rdma_source_connect success\n"); + trace_rdma_start_outgoing_migration_after_rdma_connect(); s->file = qemu_fopen_rdma(rdma, "wb"); migrate_fd_connect(s); diff --git a/migration/vmstate.c b/migration/vmstate.c index 3dde574c0f..e5388f0596 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -3,10 +3,12 @@ #include "migration/qemu-file.h" #include "migration/vmstate.h" #include "qemu/bitops.h" +#include "qemu/error-report.h" #include "trace.h" +#include "qjson.h" static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, - void *opaque); + void *opaque, QJSON *vmdesc); static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, void *opaque); @@ -72,16 +74,21 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, int version_id) { VMStateField *field = vmsd->fields; - int ret; + int ret = 0; + trace_vmstate_load_state(vmsd->name, version_id); if (version_id > vmsd->version_id) { + trace_vmstate_load_state_end(vmsd->name, "too new", -EINVAL); return -EINVAL; } if (version_id < vmsd->minimum_version_id) { if (vmsd->load_state_old && version_id >= vmsd->minimum_version_id_old) { - return vmsd->load_state_old(f, opaque, version_id); + ret = vmsd->load_state_old(f, opaque, version_id); + trace_vmstate_load_state_end(vmsd->name, "old path", ret); + return ret; } + trace_vmstate_load_state_end(vmsd->name, "too old", -EINVAL); return -EINVAL; } if (vmsd->pre_load) { @@ -91,6 +98,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, } } while (field->name) { + trace_vmstate_load_state_field(vmsd->name, field->name); if ((field->field_exists && field->field_exists(opaque, version_id)) || (!field->field_exists && @@ -122,8 +130,8 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, } } } else if (field->flags & VMS_MUST_EXIST) { - fprintf(stderr, "Input validation failed: %s/%s\n", - vmsd->name, field->name); + error_report("Input validation failed: %s/%s", + vmsd->name, field->name); return -1; } field++; @@ -133,48 +141,203 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, return ret; } if (vmsd->post_load) { - return vmsd->post_load(opaque, version_id); + ret = vmsd->post_load(opaque, version_id); } - return 0; + trace_vmstate_load_state_end(vmsd->name, "end", ret); + return ret; +} + +static int vmfield_name_num(VMStateField *start, VMStateField *search) +{ + VMStateField *field; + int found = 0; + + for (field = start; field->name; field++) { + if (!strcmp(field->name, search->name)) { + if (field == search) { + return found; + } + found++; + } + } + + return -1; +} + +static bool vmfield_name_is_unique(VMStateField *start, VMStateField *search) +{ + VMStateField *field; + int found = 0; + + for (field = start; field->name; field++) { + if (!strcmp(field->name, search->name)) { + found++; + /* name found more than once, so it's not unique */ + if (found > 1) { + return false; + } + } + } + + return true; +} + +static const char *vmfield_get_type_name(VMStateField *field) +{ + const char *type = "unknown"; + + if (field->flags & VMS_STRUCT) { + type = "struct"; + } else if (field->info->name) { + type = field->info->name; + } + + return type; +} + +static bool vmsd_can_compress(VMStateField *field) +{ + if (field->field_exists) { + /* Dynamically existing fields mess up compression */ + return false; + } + + if (field->flags & VMS_STRUCT) { + VMStateField *sfield = field->vmsd->fields; + while (sfield->name) { + if (!vmsd_can_compress(sfield)) { + /* Child elements can't compress, so can't we */ + return false; + } + sfield++; + } + + if (field->vmsd->subsections) { + /* Subsections may come and go, better don't compress */ + return false; + } + } + + return true; +} + +static void vmsd_desc_field_start(const VMStateDescription *vmsd, QJSON *vmdesc, + VMStateField *field, int i, int max) +{ + char *name, *old_name; + bool is_array = max > 1; + bool can_compress = vmsd_can_compress(field); + + if (!vmdesc) { + return; + } + + name = g_strdup(field->name); + + /* Field name is not unique, need to make it unique */ + if (!vmfield_name_is_unique(vmsd->fields, field)) { + int num = vmfield_name_num(vmsd->fields, field); + old_name = name; + name = g_strdup_printf("%s[%d]", name, num); + g_free(old_name); + } + + json_start_object(vmdesc, NULL); + json_prop_str(vmdesc, "name", name); + if (is_array) { + if (can_compress) { + json_prop_int(vmdesc, "array_len", max); + } else { + json_prop_int(vmdesc, "index", i); + } + } + json_prop_str(vmdesc, "type", vmfield_get_type_name(field)); + + if (field->flags & VMS_STRUCT) { + json_start_object(vmdesc, "struct"); + } + + g_free(name); +} + +static void vmsd_desc_field_end(const VMStateDescription *vmsd, QJSON *vmdesc, + VMStateField *field, size_t size, int i) +{ + if (!vmdesc) { + return; + } + + if (field->flags & VMS_STRUCT) { + /* We printed a struct in between, close its child object */ + json_end_object(vmdesc); + } + + json_prop_int(vmdesc, "size", size); + json_end_object(vmdesc); } void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, - void *opaque) + void *opaque, QJSON *vmdesc) { VMStateField *field = vmsd->fields; if (vmsd->pre_save) { vmsd->pre_save(opaque); } + + if (vmdesc) { + json_prop_str(vmdesc, "vmsd_name", vmsd->name); + json_prop_int(vmdesc, "version", vmsd->version_id); + json_start_array(vmdesc, "fields"); + } + while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { void *base_addr = vmstate_base_addr(opaque, field, false); int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); + int64_t old_offset, written_bytes; + QJSON *vmdesc_loop = vmdesc; for (i = 0; i < n_elems; i++) { void *addr = base_addr + size * i; + vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems); + old_offset = qemu_ftell_fast(f); + if (field->flags & VMS_ARRAY_OF_POINTER) { addr = *(void **)addr; } if (field->flags & VMS_STRUCT) { - vmstate_save_state(f, field->vmsd, addr); + vmstate_save_state(f, field->vmsd, addr, vmdesc_loop); } else { field->info->put(f, addr, size); } + + written_bytes = qemu_ftell_fast(f) - old_offset; + vmsd_desc_field_end(vmsd, vmdesc_loop, field, written_bytes, i); + + /* Compressed arrays only care about the first element */ + if (vmdesc_loop && vmsd_can_compress(field)) { + vmdesc_loop = NULL; + } } } else { if (field->flags & VMS_MUST_EXIST) { - fprintf(stderr, "Output state validation failed: %s/%s\n", + error_report("Output state validation failed: %s/%s", vmsd->name, field->name); assert(!(field->flags & VMS_MUST_EXIST)); } } field++; } - vmstate_subsection_save(f, vmsd, opaque); + + if (vmdesc) { + json_end_array(vmdesc); + } + + vmstate_subsection_save(f, vmsd, opaque, vmdesc); } static const VMStateDescription * @@ -192,6 +355,8 @@ static const VMStateDescription * static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, void *opaque) { + trace_vmstate_subsection_load(vmsd->name); + while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) { char idstr[256]; int ret; @@ -201,20 +366,24 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, len = qemu_peek_byte(f, 1); if (len < strlen(vmsd->name) + 1) { /* subsection name has be be "section_name/a" */ + trace_vmstate_subsection_load_bad(vmsd->name, "(short)"); return 0; } size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2); if (size != len) { + trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)"); return 0; } idstr[size] = 0; if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) { + trace_vmstate_subsection_load_bad(vmsd->name, idstr); /* it don't have a valid subsection name */ return 0; } sub_vmsd = vmstate_get_subsection(vmsd->subsections, idstr); if (sub_vmsd == NULL) { + trace_vmstate_subsection_load_bad(vmsd->name, "(lookup)"); return -ENOENT; } qemu_file_skip(f, 1); /* subsection */ @@ -224,31 +393,53 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, ret = vmstate_load_state(f, sub_vmsd, opaque, version_id); if (ret) { + trace_vmstate_subsection_load_bad(vmsd->name, "(child)"); return ret; } } + + trace_vmstate_subsection_load_good(vmsd->name); return 0; } static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, - void *opaque) + void *opaque, QJSON *vmdesc) { const VMStateSubsection *sub = vmsd->subsections; + bool subsection_found = false; while (sub && sub->needed) { if (sub->needed(opaque)) { const VMStateDescription *vmsd = sub->vmsd; uint8_t len; + if (vmdesc) { + /* Only create subsection array when we have any */ + if (!subsection_found) { + json_start_array(vmdesc, "subsections"); + subsection_found = true; + } + + json_start_object(vmdesc, NULL); + } + qemu_put_byte(f, QEMU_VM_SUBSECTION); len = strlen(vmsd->name); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)vmsd->name, len); qemu_put_be32(f, vmsd->version_id); - vmstate_save_state(f, vmsd, opaque); + vmstate_save_state(f, vmsd, opaque, vmdesc); + + if (vmdesc) { + json_end_object(vmdesc); + } } sub++; } + + if (vmdesc && subsection_found) { + json_end_array(vmdesc); + } } /* bool */ @@ -1095,11 +1095,12 @@ static int client_migrate_info(Monitor *mon, const QDict *qdict, const char *subject = qdict_get_try_str(qdict, "cert-subject"); int port = qdict_get_try_int(qdict, "port", -1); int tls_port = qdict_get_try_int(qdict, "tls-port", -1); + Error *err; int ret; if (strcmp(protocol, "spice") == 0) { - if (!using_spice) { - qerror_report(QERR_DEVICE_NOT_ACTIVE, "spice"); + if (!qemu_using_spice(&err)) { + qerror_report_err(err); return -1; } @@ -4782,9 +4783,9 @@ static int monitor_can_read(void *opaque) return (mon->suspend_cnt == 0) ? 1 : 0; } -static int invalid_qmp_mode(const Monitor *mon, const char *cmd_name) +static int invalid_qmp_mode(const Monitor *mon, const mon_cmd_t *cmd) { - int is_cap = compare_cmd(cmd_name, "qmp_capabilities"); + int is_cap = cmd->mhandler.cmd_new == do_qmp_capabilities; return (qmp_cmd_mode(mon) ? is_cap : !is_cap); } @@ -5078,14 +5079,10 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) cmd_name = qdict_get_str(input, "execute"); trace_handle_qmp_command(mon, cmd_name); - if (invalid_qmp_mode(mon, cmd_name)) { - qerror_report(QERR_COMMAND_NOT_FOUND, cmd_name); - goto err_out; - } - cmd = qmp_find_cmd(cmd_name); - if (!cmd) { - qerror_report(QERR_COMMAND_NOT_FOUND, cmd_name); + if (!cmd || invalid_qmp_mode(mon, cmd)) { + qerror_report(ERROR_CLASS_COMMAND_NOT_FOUND, + "The command %s has not been found", cmd_name); goto err_out; } diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex 44873ad181..dbe5a38262 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 115d8bbac6..b678d5ebb8 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -33,7 +33,7 @@ typedef struct ResetInfo { uint32_t ipl_continue; } ResetInfo; -ResetInfo save; +static ResetInfo save; static void jump_to_IPL_2(void) { @@ -80,7 +80,7 @@ static void jump_to_IPL_code(uint64_t address) */ static unsigned char _bprs[8*1024]; /* guessed "max" ECKD sector size */ -const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr); +static const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr); static inline void verify_boot_info(BootInfo *bip) { diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h index 6a4823d544..ab132e3579 100644 --- a/pc-bios/s390-ccw/bootmap.h +++ b/pc-bios/s390-ccw/bootmap.h @@ -15,7 +15,7 @@ #include "virtio.h" typedef uint64_t block_number_t; -#define NULL_BLOCK_NR 0xffffffffffffffff +#define NULL_BLOCK_NR 0xffffffffffffffffULL #define FREE_SPACE_FILLER '\xAA' diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index f9ec2157ad..6f707bbcd4 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -13,7 +13,7 @@ char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); uint64_t boot_value; -struct subchannel_id blk_schid = { .one = 1 }; +static struct subchannel_id blk_schid = { .one = 1 }; /* * Priniciples of Operations (SA22-7832-09) chapter 17 requires that diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h index 2b773deafa..ceb7418a50 100644 --- a/pc-bios/s390-ccw/s390-ccw.h +++ b/pc-bios/s390-ccw/s390-ccw.h @@ -51,6 +51,8 @@ void disabled_wait(void); /* main.c */ void virtio_panic(const char *string); void write_subsystem_identification(void); +extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); +extern uint64_t boot_value; /* sclp-ascii.c */ void sclp_print(const char *string); diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index c0540d1cd4..4dc91a7c43 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -11,7 +11,7 @@ #include "s390-ccw.h" #include "virtio.h" -struct vring block; +static struct vring block; static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index 168b083c87..222742013f 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -76,7 +76,8 @@ static QObject *do_qmp_dispatch(QObject *request, Error **errp) command = qdict_get_str(dict, "execute"); cmd = qmp_find_command(command); if (cmd == NULL) { - error_set(errp, QERR_COMMAND_NOT_FOUND, command); + error_set(errp, ERROR_CLASS_COMMAND_NOT_FOUND, + "The command %s has not been found", command); return NULL; } if (!cmd->enabled) { diff --git a/qjson.c b/qjson.c new file mode 100644 index 0000000000..b242222a58 --- /dev/null +++ b/qjson.c @@ -0,0 +1,129 @@ +/* + * QEMU JSON writer + * + * Copyright Alexander Graf + * + * Authors: + * Alexander Graf <agraf@suse.de + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <qapi/qmp/qstring.h> +#include <stdbool.h> +#include <glib.h> +#include <qjson.h> +#include <qemu/module.h> +#include <qom/object.h> + +struct QJSON { + Object obj; + QString *str; + bool omit_comma; +}; + +static void json_emit_element(QJSON *json, const char *name) +{ + /* Check whether we need to print a , before an element */ + if (json->omit_comma) { + json->omit_comma = false; + } else { + qstring_append(json->str, ", "); + } + + if (name) { + qstring_append(json->str, "\""); + qstring_append(json->str, name); + qstring_append(json->str, "\" : "); + } +} + +void json_start_object(QJSON *json, const char *name) +{ + json_emit_element(json, name); + qstring_append(json->str, "{ "); + json->omit_comma = true; +} + +void json_end_object(QJSON *json) +{ + qstring_append(json->str, " }"); + json->omit_comma = false; +} + +void json_start_array(QJSON *json, const char *name) +{ + json_emit_element(json, name); + qstring_append(json->str, "[ "); + json->omit_comma = true; +} + +void json_end_array(QJSON *json) +{ + qstring_append(json->str, " ]"); + json->omit_comma = false; +} + +void json_prop_int(QJSON *json, const char *name, int64_t val) +{ + json_emit_element(json, name); + qstring_append_int(json->str, val); +} + +void json_prop_str(QJSON *json, const char *name, const char *str) +{ + json_emit_element(json, name); + qstring_append_chr(json->str, '"'); + qstring_append(json->str, str); + qstring_append_chr(json->str, '"'); +} + +const char *qjson_get_str(QJSON *json) +{ + return qstring_get_str(json->str); +} + +QJSON *qjson_new(void) +{ + QJSON *json = (QJSON *)object_new(TYPE_QJSON); + return json; +} + +void qjson_finish(QJSON *json) +{ + json_end_object(json); +} + +static void qjson_initfn(Object *obj) +{ + QJSON *json = (QJSON *)object_dynamic_cast(obj, TYPE_QJSON); + assert(json); + + json->str = qstring_from_str("{ "); + json->omit_comma = true; +} + +static void qjson_finalizefn(Object *obj) +{ + QJSON *json = (QJSON *)object_dynamic_cast(obj, TYPE_QJSON); + + assert(json); + qobject_decref(QOBJECT(json->str)); +} + +static const TypeInfo qjson_type_info = { + .name = TYPE_QJSON, + .parent = TYPE_OBJECT, + .instance_size = sizeof(QJSON), + .instance_init = qjson_initfn, + .instance_finalize = qjson_finalizefn, +}; + +static void qjson_register_types(void) +{ + type_register_static(&qjson_type_info); +} + +type_init(qjson_register_types) @@ -137,14 +137,18 @@ VncInfo *qmp_query_vnc(Error **errp) #endif #ifndef CONFIG_SPICE -/* If SPICE support is enabled, the "true" query-spice command is - defined in the SPICE subsystem. Also note that we use a small - trick to maintain query-spice's original behavior, which is not - to be available in the namespace if SPICE is not compiled in */ +/* + * qmp-commands.hx ensures that QMP command query-spice exists only + * #ifdef CONFIG_SPICE. Necessary for an accurate query-commands + * result. However, the QAPI schema is blissfully unaware of that, + * and the QAPI code generator happily generates a dead + * qmp_marshal_input_query_spice() that calls qmp_query_spice(). + * Provide it one, or else linking fails. + * FIXME Educate the QAPI schema on CONFIG_SPICE. + */ SpiceInfo *qmp_query_spice(Error **errp) { - error_set(errp, QERR_COMMAND_NOT_FOUND, "query-spice"); - return NULL; + abort(); }; #endif @@ -287,9 +291,7 @@ void qmp_set_password(const char *protocol, const char *password, } if (strcmp(protocol, "spice") == 0) { - if (!using_spice) { - /* correct one? spice isn't a device ,,, */ - error_set(errp, QERR_DEVICE_NOT_ACTIVE, "spice"); + if (!qemu_using_spice(errp)) { return; } rc = qemu_spice_set_passwd(password, fail_if_connected, @@ -335,9 +337,7 @@ void qmp_expire_password(const char *protocol, const char *whenstr, } if (strcmp(protocol, "spice") == 0) { - if (!using_spice) { - /* correct one? spice isn't a device ,,, */ - error_set(errp, QERR_DEVICE_NOT_ACTIVE, "spice"); + if (!qemu_using_spice(errp)) { return; } rc = qemu_spice_set_pw_expire(when); @@ -575,8 +575,7 @@ void qmp_add_client(const char *protocol, const char *fdname, } if (strcmp(protocol, "spice") == 0) { - if (!using_spice) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, "spice"); + if (!qemu_using_spice(errp)) { close(fd); return; } @@ -572,14 +572,34 @@ static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id) return vmstate_load_state(f, se->vmsd, se->opaque, version_id); } -static void vmstate_save(QEMUFile *f, SaveStateEntry *se) +static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc) +{ + int64_t old_offset, size; + + old_offset = qemu_ftell_fast(f); + se->ops->save_state(f, se->opaque); + size = qemu_ftell_fast(f) - old_offset; + + if (vmdesc) { + json_prop_int(vmdesc, "size", size); + json_start_array(vmdesc, "fields"); + json_start_object(vmdesc, NULL); + json_prop_str(vmdesc, "name", "data"); + json_prop_int(vmdesc, "size", size); + json_prop_str(vmdesc, "type", "buffer"); + json_end_object(vmdesc); + json_end_array(vmdesc); + } +} + +static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc) { trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)"); - if (!se->vmsd) { /* Old style */ - se->ops->save_state(f, se->opaque); + if (!se->vmsd) { + vmstate_save_old_style(f, se, vmdesc); return; } - vmstate_save_state(f, se->vmsd, se->opaque); + vmstate_save_state(f, se->vmsd, se->opaque, vmdesc); } bool qemu_savevm_state_blocked(Error **errp) @@ -674,7 +694,7 @@ int qemu_savevm_state_iterate(QEMUFile *f) qemu_put_be32(f, se->section_id); ret = se->ops->save_live_iterate(f, se->opaque); - trace_savevm_section_end(se->idstr, se->section_id); + trace_savevm_section_end(se->idstr, se->section_id, ret); if (ret < 0) { qemu_file_set_error(f, ret); @@ -692,6 +712,8 @@ int qemu_savevm_state_iterate(QEMUFile *f) void qemu_savevm_state_complete(QEMUFile *f) { + QJSON *vmdesc; + int vmdesc_len; SaveStateEntry *se; int ret; @@ -714,13 +736,16 @@ void qemu_savevm_state_complete(QEMUFile *f) qemu_put_be32(f, se->section_id); ret = se->ops->save_live_complete(f, se->opaque); - trace_savevm_section_end(se->idstr, se->section_id); + trace_savevm_section_end(se->idstr, se->section_id, ret); if (ret < 0) { qemu_file_set_error(f, ret); return; } } + vmdesc = qjson_new(); + json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE); + json_start_array(vmdesc, "devices"); QTAILQ_FOREACH(se, &savevm_handlers, entry) { int len; @@ -728,6 +753,11 @@ void qemu_savevm_state_complete(QEMUFile *f) continue; } trace_savevm_section_start(se->idstr, se->section_id); + + json_start_object(vmdesc, NULL); + json_prop_str(vmdesc, "name", se->idstr); + json_prop_int(vmdesc, "instance_id", se->instance_id); + /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_FULL); qemu_put_be32(f, se->section_id); @@ -740,11 +770,23 @@ void qemu_savevm_state_complete(QEMUFile *f) qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); - vmstate_save(f, se); - trace_savevm_section_end(se->idstr, se->section_id); + vmstate_save(f, se, vmdesc); + + json_end_object(vmdesc); + trace_savevm_section_end(se->idstr, se->section_id, 0); } qemu_put_byte(f, QEMU_VM_EOF); + + json_end_array(vmdesc); + qjson_finish(vmdesc); + vmdesc_len = strlen(qjson_get_str(vmdesc)); + + qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); + qemu_put_be32(f, vmdesc_len); + qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len); + object_unref(OBJECT(vmdesc)); + qemu_fflush(f); } @@ -843,7 +885,7 @@ static int qemu_save_device_state(QEMUFile *f) qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); - vmstate_save(f, se); + vmstate_save(f, se, NULL); } qemu_put_byte(f, QEMU_VM_EOF); @@ -883,25 +925,30 @@ int qemu_loadvm_state(QEMUFile *f) QLIST_HEAD(, LoadStateEntry) loadvm_handlers = QLIST_HEAD_INITIALIZER(loadvm_handlers); LoadStateEntry *le, *new_le; + Error *local_err = NULL; uint8_t section_type; unsigned int v; int ret; - if (qemu_savevm_state_blocked(NULL)) { + if (qemu_savevm_state_blocked(&local_err)) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); return -EINVAL; } v = qemu_get_be32(f); if (v != QEMU_VM_FILE_MAGIC) { + error_report("Not a migration stream"); return -EINVAL; } v = qemu_get_be32(f); if (v == QEMU_VM_FILE_VERSION_COMPAT) { - fprintf(stderr, "SaveVM v2 format is obsolete and don't work anymore\n"); + error_report("SaveVM v2 format is obsolete and don't work anymore"); return -ENOTSUP; } if (v != QEMU_VM_FILE_VERSION) { + error_report("Unsupported migration stream version"); return -ENOTSUP; } @@ -911,6 +958,7 @@ int qemu_loadvm_state(QEMUFile *f) char idstr[257]; int len; + trace_qemu_loadvm_state_section(section_type); switch (section_type) { case QEMU_VM_SECTION_START: case QEMU_VM_SECTION_FULL: @@ -922,18 +970,21 @@ int qemu_loadvm_state(QEMUFile *f) instance_id = qemu_get_be32(f); version_id = qemu_get_be32(f); + trace_qemu_loadvm_state_section_startfull(section_id, idstr, + instance_id, version_id); /* Find savevm section */ se = find_se(idstr, instance_id); if (se == NULL) { - fprintf(stderr, "Unknown savevm section or instance '%s' %d\n", idstr, instance_id); + error_report("Unknown savevm section or instance '%s' %d", + idstr, instance_id); ret = -EINVAL; goto out; } /* Validate version */ if (version_id > se->version_id) { - fprintf(stderr, "savevm: unsupported version %d for '%s' v%d\n", - version_id, idstr, se->version_id); + error_report("savevm: unsupported version %d for '%s' v%d", + version_id, idstr, se->version_id); ret = -EINVAL; goto out; } @@ -948,8 +999,8 @@ int qemu_loadvm_state(QEMUFile *f) ret = vmstate_load(f, le->se, le->version_id); if (ret < 0) { - fprintf(stderr, "qemu: warning: error while loading state for instance 0x%x of device '%s'\n", - instance_id, idstr); + error_report("error while loading state for instance 0x%x of" + " device '%s'", instance_id, idstr); goto out; } break; @@ -957,26 +1008,27 @@ int qemu_loadvm_state(QEMUFile *f) case QEMU_VM_SECTION_END: section_id = qemu_get_be32(f); + trace_qemu_loadvm_state_section_partend(section_id); QLIST_FOREACH(le, &loadvm_handlers, entry) { if (le->section_id == section_id) { break; } } if (le == NULL) { - fprintf(stderr, "Unknown savevm section %d\n", section_id); + error_report("Unknown savevm section %d", section_id); ret = -EINVAL; goto out; } ret = vmstate_load(f, le->se, le->version_id); if (ret < 0) { - fprintf(stderr, "qemu: warning: error while loading state section id %d\n", - section_id); + error_report("error while loading state section id %d(%s)", + section_id, le->se->idstr); goto out; } break; default: - fprintf(stderr, "Unknown savevm section type %d\n", section_type); + error_report("Unknown savevm section type %d", section_type); ret = -EINVAL; goto out; } diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py new file mode 100755 index 0000000000..b8b9968e00 --- /dev/null +++ b/scripts/analyze-migration.py @@ -0,0 +1,592 @@ +#!/usr/bin/env python +# +# Migration Stream Analyzer +# +# Copyright (c) 2015 Alexander Graf <agraf@suse.de> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +import numpy as np +import json +import os +import argparse +import collections +import pprint + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError: + pass + +class MigrationFile(object): + def __init__(self, filename): + self.filename = filename + self.file = open(self.filename, "rb") + + def read64(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0]) + + def read32(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0]) + + def read16(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0]) + + def read8(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0]) + + def readstr(self, len = None): + if len is None: + len = self.read8() + if len == 0: + return "" + return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0] + + def readvar(self, size = None): + if size is None: + size = self.read8() + if size == 0: + return "" + value = self.file.read(size) + if len(value) != size: + raise Exception("Unexpected end of %s at 0x%x" % (self.filename, self.file.tell())) + return value + + def tell(self): + return self.file.tell() + + # The VMSD description is at the end of the file, after EOF. Look for + # the last NULL byte, then for the beginning brace of JSON. + def read_migration_debug_json(self): + QEMU_VM_VMDESCRIPTION = 0x06 + + # Remember the offset in the file when we started + entrypos = self.file.tell() + + # Read the last 10MB + self.file.seek(0, os.SEEK_END) + endpos = self.file.tell() + self.file.seek(max(-endpos, -10 * 1024 * 1024), os.SEEK_END) + datapos = self.file.tell() + data = self.file.read() + # The full file read closed the file as well, reopen it + self.file = open(self.filename, "rb") + + # Find the last NULL byte, then the first brace after that. This should + # be the beginning of our JSON data. + nulpos = data.rfind("\0") + jsonpos = data.find("{", nulpos) + + # Check backwards from there and see whether we guessed right + self.file.seek(datapos + jsonpos - 5, 0) + if self.read8() != QEMU_VM_VMDESCRIPTION: + raise Exception("No Debug Migration device found") + + jsonlen = self.read32() + + # Seek back to where we were at the beginning + self.file.seek(entrypos, 0) + + return data[jsonpos:jsonpos + jsonlen] + + def close(self): + self.file.close() + +class RamSection(object): + RAM_SAVE_FLAG_COMPRESS = 0x02 + RAM_SAVE_FLAG_MEM_SIZE = 0x04 + RAM_SAVE_FLAG_PAGE = 0x08 + RAM_SAVE_FLAG_EOS = 0x10 + RAM_SAVE_FLAG_CONTINUE = 0x20 + RAM_SAVE_FLAG_XBZRLE = 0x40 + RAM_SAVE_FLAG_HOOK = 0x80 + + def __init__(self, file, version_id, ramargs, section_key): + if version_id != 4: + raise Exception("Unknown RAM version %d" % version_id) + + self.file = file + self.section_key = section_key + self.TARGET_PAGE_SIZE = ramargs['page_size'] + self.dump_memory = ramargs['dump_memory'] + self.write_memory = ramargs['write_memory'] + self.sizeinfo = collections.OrderedDict() + self.data = collections.OrderedDict() + self.data['section sizes'] = self.sizeinfo + self.name = '' + if self.write_memory: + self.files = { } + if self.dump_memory: + self.memory = collections.OrderedDict() + self.data['memory'] = self.memory + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def getDict(self): + return self.data + + def read(self): + # Read all RAM sections + while True: + addr = self.file.read64() + flags = addr & (self.TARGET_PAGE_SIZE - 1) + addr &= ~(self.TARGET_PAGE_SIZE - 1) + + if flags & self.RAM_SAVE_FLAG_MEM_SIZE: + while True: + namelen = self.file.read8() + # We assume that no RAM chunk is big enough to ever + # hit the first byte of the address, so when we see + # a zero here we know it has to be an address, not the + # length of the next block. + if namelen == 0: + self.file.file.seek(-1, 1) + break + self.name = self.file.readstr(len = namelen) + len = self.file.read64() + self.sizeinfo[self.name] = '0x%016x' % len + if self.write_memory: + print self.name + mkdir_p('./' + os.path.dirname(self.name)) + f = open('./' + self.name, "wb") + f.truncate(0) + f.truncate(len) + self.files[self.name] = f + flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE + + if flags & self.RAM_SAVE_FLAG_COMPRESS: + if flags & self.RAM_SAVE_FLAG_CONTINUE: + flags &= ~self.RAM_SAVE_FLAG_CONTINUE + else: + self.name = self.file.readstr() + fill_char = self.file.read8() + # The page in question is filled with fill_char now + if self.write_memory and fill_char != 0: + self.files[self.name].seek(addr, os.SEEK_SET) + self.files[self.name].write(chr(fill_char) * self.TARGET_PAGE_SIZE) + if self.dump_memory: + self.memory['%s (0x%016x)' % (self.name, addr)] = 'Filled with 0x%02x' % fill_char + flags &= ~self.RAM_SAVE_FLAG_COMPRESS + elif flags & self.RAM_SAVE_FLAG_PAGE: + if flags & self.RAM_SAVE_FLAG_CONTINUE: + flags &= ~self.RAM_SAVE_FLAG_CONTINUE + else: + self.name = self.file.readstr() + + if self.write_memory or self.dump_memory: + data = self.file.readvar(size = self.TARGET_PAGE_SIZE) + else: # Just skip RAM data + self.file.file.seek(self.TARGET_PAGE_SIZE, 1) + + if self.write_memory: + self.files[self.name].seek(addr, os.SEEK_SET) + self.files[self.name].write(data) + if self.dump_memory: + hexdata = " ".join("{0:02x}".format(ord(c)) for c in data) + self.memory['%s (0x%016x)' % (self.name, addr)] = hexdata + + flags &= ~self.RAM_SAVE_FLAG_PAGE + elif flags & self.RAM_SAVE_FLAG_XBZRLE: + raise Exception("XBZRLE RAM compression is not supported yet") + elif flags & self.RAM_SAVE_FLAG_HOOK: + raise Exception("RAM hooks don't make sense with files") + + # End of RAM section + if flags & self.RAM_SAVE_FLAG_EOS: + break + + if flags != 0: + raise Exception("Unknown RAM flags: %x" % flags) + + def __del__(self): + if self.write_memory: + for key in self.files: + self.files[key].close() + + +class HTABSection(object): + HASH_PTE_SIZE_64 = 16 + + def __init__(self, file, version_id, device, section_key): + if version_id != 1: + raise Exception("Unknown HTAB version %d" % version_id) + + self.file = file + self.section_key = section_key + + def read(self): + + header = self.file.read32() + + if (header > 0): + # First section, just the hash shift + return + + # Read until end marker + while True: + index = self.file.read32() + n_valid = self.file.read16() + n_invalid = self.file.read16() + + if index == 0 and n_valid == 0 and n_invalid == 0: + break + + self.file.readvar(n_valid * HASH_PTE_SIZE_64) + + def getDict(self): + return "" + +class VMSDFieldGeneric(object): + def __init__(self, desc, file): + self.file = file + self.desc = desc + self.data = "" + + def __repr__(self): + return str(self.__str__()) + + def __str__(self): + return " ".join("{0:02x}".format(ord(c)) for c in self.data) + + def getDict(self): + return self.__str__() + + def read(self): + size = int(self.desc['size']) + self.data = self.file.readvar(size) + return self.data + +class VMSDFieldInt(VMSDFieldGeneric): + def __init__(self, desc, file): + super(VMSDFieldInt, self).__init__(desc, file) + self.size = int(desc['size']) + self.format = '0x%%0%dx' % (self.size * 2) + self.sdtype = '>i%d' % self.size + self.udtype = '>u%d' % self.size + + def __repr__(self): + if self.data < 0: + return ('%s (%d)' % ((self.format % self.udata), self.data)) + else: + return self.format % self.data + + def __str__(self): + return self.__repr__() + + def getDict(self): + return self.__str__() + + def read(self): + super(VMSDFieldInt, self).read() + self.sdata = np.fromstring(self.data, count=1, dtype=(self.sdtype))[0] + self.udata = np.fromstring(self.data, count=1, dtype=(self.udtype))[0] + self.data = self.sdata + return self.data + +class VMSDFieldUInt(VMSDFieldInt): + def __init__(self, desc, file): + super(VMSDFieldUInt, self).__init__(desc, file) + + def read(self): + super(VMSDFieldUInt, self).read() + self.data = self.udata + return self.data + +class VMSDFieldIntLE(VMSDFieldInt): + def __init__(self, desc, file): + super(VMSDFieldIntLE, self).__init__(desc, file) + self.dtype = '<i%d' % self.size + +class VMSDFieldBool(VMSDFieldGeneric): + def __init__(self, desc, file): + super(VMSDFieldBool, self).__init__(desc, file) + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def getDict(self): + return self.data + + def read(self): + super(VMSDFieldBool, self).read() + if self.data[0] == 0: + self.data = False + else: + self.data = True + return self.data + +class VMSDFieldStruct(VMSDFieldGeneric): + QEMU_VM_SUBSECTION = 0x05 + + def __init__(self, desc, file): + super(VMSDFieldStruct, self).__init__(desc, file) + self.data = collections.OrderedDict() + + # When we see compressed array elements, unfold them here + new_fields = [] + for field in self.desc['struct']['fields']: + if not 'array_len' in field: + new_fields.append(field) + continue + array_len = field.pop('array_len') + field['index'] = 0 + new_fields.append(field) + for i in xrange(1, array_len): + c = field.copy() + c['index'] = i + new_fields.append(c) + + self.desc['struct']['fields'] = new_fields + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def read(self): + for field in self.desc['struct']['fields']: + try: + reader = vmsd_field_readers[field['type']] + except: + reader = VMSDFieldGeneric + + field['data'] = reader(field, self.file) + field['data'].read() + + if 'index' in field: + if field['name'] not in self.data: + self.data[field['name']] = [] + a = self.data[field['name']] + if len(a) != int(field['index']): + raise Exception("internal index of data field unmatched (%d/%d)" % (len(a), int(field['index']))) + a.append(field['data']) + else: + self.data[field['name']] = field['data'] + + if 'subsections' in self.desc['struct']: + for subsection in self.desc['struct']['subsections']: + if self.file.read8() != self.QEMU_VM_SUBSECTION: + raise Exception("Subsection %s not found at offset %x" % ( subsection['vmsd_name'], self.file.tell())) + name = self.file.readstr() + version_id = self.file.read32() + self.data[name] = VMSDSection(self.file, version_id, subsection, (name, 0)) + self.data[name].read() + + def getDictItem(self, value): + # Strings would fall into the array category, treat + # them specially + if value.__class__ is ''.__class__: + return value + + try: + return self.getDictOrderedDict(value) + except: + try: + return self.getDictArray(value) + except: + try: + return value.getDict() + except: + return value + + def getDictArray(self, array): + r = [] + for value in array: + r.append(self.getDictItem(value)) + return r + + def getDictOrderedDict(self, dict): + r = collections.OrderedDict() + for (key, value) in dict.items(): + r[key] = self.getDictItem(value) + return r + + def getDict(self): + return self.getDictOrderedDict(self.data) + +vmsd_field_readers = { + "bool" : VMSDFieldBool, + "int8" : VMSDFieldInt, + "int16" : VMSDFieldInt, + "int32" : VMSDFieldInt, + "int32 equal" : VMSDFieldInt, + "int32 le" : VMSDFieldIntLE, + "int64" : VMSDFieldInt, + "uint8" : VMSDFieldUInt, + "uint16" : VMSDFieldUInt, + "uint32" : VMSDFieldUInt, + "uint32 equal" : VMSDFieldUInt, + "uint64" : VMSDFieldUInt, + "int64 equal" : VMSDFieldInt, + "uint8 equal" : VMSDFieldInt, + "uint16 equal" : VMSDFieldInt, + "float64" : VMSDFieldGeneric, + "timer" : VMSDFieldGeneric, + "buffer" : VMSDFieldGeneric, + "unused_buffer" : VMSDFieldGeneric, + "bitmap" : VMSDFieldGeneric, + "struct" : VMSDFieldStruct, + "unknown" : VMSDFieldGeneric, +} + +class VMSDSection(VMSDFieldStruct): + def __init__(self, file, version_id, device, section_key): + self.file = file + self.data = "" + self.vmsd_name = "" + self.section_key = section_key + desc = device + if 'vmsd_name' in device: + self.vmsd_name = device['vmsd_name'] + + # A section really is nothing but a FieldStruct :) + super(VMSDSection, self).__init__({ 'struct' : desc }, file) + +############################################################################### + +class MigrationDump(object): + QEMU_VM_FILE_MAGIC = 0x5145564d + QEMU_VM_FILE_VERSION = 0x00000003 + QEMU_VM_EOF = 0x00 + QEMU_VM_SECTION_START = 0x01 + QEMU_VM_SECTION_PART = 0x02 + QEMU_VM_SECTION_END = 0x03 + QEMU_VM_SECTION_FULL = 0x04 + QEMU_VM_SUBSECTION = 0x05 + QEMU_VM_VMDESCRIPTION = 0x06 + + def __init__(self, filename): + self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ], + ( 'spapr/htab', 0) : ( HTABSection, None ) } + self.filename = filename + self.vmsd_desc = None + + def read(self, desc_only = False, dump_memory = False, write_memory = False): + # Read in the whole file + file = MigrationFile(self.filename) + + # File magic + data = file.read32() + if data != self.QEMU_VM_FILE_MAGIC: + raise Exception("Invalid file magic %x" % data) + + # Version (has to be v3) + data = file.read32() + if data != self.QEMU_VM_FILE_VERSION: + raise Exception("Invalid version number %d" % data) + + self.load_vmsd_json(file) + + # Read sections + self.sections = collections.OrderedDict() + + if desc_only: + return + + ramargs = {} + ramargs['page_size'] = self.vmsd_desc['page_size'] + ramargs['dump_memory'] = dump_memory + ramargs['write_memory'] = write_memory + self.section_classes[('ram',0)][1] = ramargs + + while True: + section_type = file.read8() + if section_type == self.QEMU_VM_EOF: + break + elif section_type == self.QEMU_VM_SECTION_START or section_type == self.QEMU_VM_SECTION_FULL: + section_id = file.read32() + name = file.readstr() + instance_id = file.read32() + version_id = file.read32() + section_key = (name, instance_id) + classdesc = self.section_classes[section_key] + section = classdesc[0](file, version_id, classdesc[1], section_key) + self.sections[section_id] = section + section.read() + elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END: + section_id = file.read32() + self.sections[section_id].read() + else: + raise Exception("Unknown section type: %d" % section_type) + file.close() + + def load_vmsd_json(self, file): + vmsd_json = file.read_migration_debug_json() + self.vmsd_desc = json.loads(vmsd_json, object_pairs_hook=collections.OrderedDict) + for device in self.vmsd_desc['devices']: + key = (device['name'], device['instance_id']) + value = ( VMSDSection, device ) + self.section_classes[key] = value + + def getDict(self): + r = collections.OrderedDict() + for (key, value) in self.sections.items(): + key = "%s (%d)" % ( value.section_key[0], key ) + r[key] = value.getDict() + return r + +############################################################################### + +class JSONEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, VMSDFieldGeneric): + return str(o) + return json.JSONEncoder.default(self, o) + +parser = argparse.ArgumentParser() +parser.add_argument("-f", "--file", help='migration dump to read from', required=True) +parser.add_argument("-m", "--memory", help='dump RAM contents as well', action='store_true') +parser.add_argument("-d", "--dump", help='what to dump ("state" or "desc")', default='state') +parser.add_argument("-x", "--extract", help='extract contents into individual files', action='store_true') +args = parser.parse_args() + +jsonenc = JSONEncoder(indent=4, separators=(',', ': ')) + +if args.extract: + dump = MigrationDump(args.file) + + dump.read(desc_only = True) + print "desc.json" + f = open("desc.json", "wb") + f.truncate() + f.write(jsonenc.encode(dump.vmsd_desc)) + f.close() + + dump.read(write_memory = True) + dict = dump.getDict() + print "state.json" + f = open("state.json", "wb") + f.truncate() + f.write(jsonenc.encode(dict)) + f.close() +elif args.dump == "state": + dump = MigrationDump(args.file) + dump.read(dump_memory = args.memory) + dict = dump.getDict() + print jsonenc.encode(dict) +elif args.dump == "desc": + dump = MigrationDump(args.file) + dump.read(desc_only = True) + print jsonenc.encode(dump.vmsd_desc) +else: + raise Exception("Please specify either -x, -d state or -d dump") diff --git a/scripts/coverity-model.c b/scripts/coverity-model.c index 4c99a85cfc..58356afa66 100644 --- a/scripts/coverity-model.c +++ b/scripts/coverity-model.c @@ -40,6 +40,8 @@ typedef unsigned long long uint64_t; typedef long long int64_t; typedef _Bool bool; +typedef struct va_list_str *va_list; + /* exec.c */ typedef struct AddressSpace AddressSpace; @@ -90,7 +92,8 @@ static int get_keysym(const name2keysym_t *table, } } -/* glib memory allocation functions. +/* + * GLib memory allocation functions. * * Note that we ignore the fact that g_malloc of 0 bytes returns NULL, * and g_realloc of 0 bytes frees the pointer. @@ -107,60 +110,215 @@ static int get_keysym(const name2keysym_t *table, * we'll get a buffer overflow reported anyway. */ -void *malloc(size_t); -void *calloc(size_t, size_t); -void *realloc(void *, size_t); -void free(void *); +/* + * Allocation primitives, cannot return NULL + * See also Coverity's library/generic/libc/all/all.c + */ + +void *g_malloc_n(size_t nmemb, size_t size) +{ + size_t sz; + void *ptr; + + __coverity_negative_sink__(nmemb); + __coverity_negative_sink__(size); + sz = nmemb * size; + ptr = __coverity_alloc__(size); + __coverity_mark_as_uninitialized_buffer__(ptr); + __coverity_mark_as_afm_allocated__(ptr, "g_free"); + return ptr; +} + +void *g_malloc0_n(size_t nmemb, size_t size) +{ + size_t sz; + void *ptr; + + __coverity_negative_sink__(nmemb); + __coverity_negative_sink__(size); + sz = nmemb * size; + ptr = __coverity_alloc__(size); + __coverity_writeall0__(ptr); + __coverity_mark_as_afm_allocated__(ptr, "g_free"); + return ptr; +} -void * -g_malloc(size_t n_bytes) +void *g_realloc_n(void *ptr, size_t nmemb, size_t size) { - void *mem; - __coverity_negative_sink__(n_bytes); - mem = malloc(n_bytes == 0 ? 1 : n_bytes); - if (!mem) __coverity_panic__(); - return mem; + size_t sz; + + __coverity_negative_sink__(nmemb); + __coverity_negative_sink__(size); + sz = nmemb * size; + __coverity_escape__(ptr); + ptr = __coverity_alloc__(size); + /* + * Memory beyond the old size isn't actually initialized. Can't + * model that. See Coverity's realloc() model + */ + __coverity_writeall__(ptr); + __coverity_mark_as_afm_allocated__(ptr, "g_free"); + return ptr; } -void * -g_malloc0(size_t n_bytes) +void g_free(void *ptr) +{ + __coverity_free__(ptr); + __coverity_mark_as_afm_freed__(ptr, "g_free"); +} + +/* + * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate + * out of memory conditions + */ + +void *g_try_malloc_n(size_t nmemb, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_malloc_n(nmemb, size); +} + +void *g_try_malloc0_n(size_t nmemb, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_malloc0_n(nmemb, size); +} + +void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_realloc_n(ptr, nmemb, size); +} + +/* Trivially derive the g_FOO() from the g_FOO_n() */ + +void *g_malloc(size_t size) { - void *mem; - __coverity_negative_sink__(n_bytes); - mem = calloc(1, n_bytes == 0 ? 1 : n_bytes); - if (!mem) __coverity_panic__(); - return mem; + return g_malloc_n(1, size); } -void g_free(void *mem) +void *g_malloc0(size_t size) { - free(mem); + return g_malloc0_n(1, size); } -void *g_realloc(void * mem, size_t n_bytes) +void *g_realloc(void *ptr, size_t size) { - __coverity_negative_sink__(n_bytes); - mem = realloc(mem, n_bytes == 0 ? 1 : n_bytes); - if (!mem) __coverity_panic__(); - return mem; + return g_realloc_n(ptr, 1, size); } -void *g_try_malloc(size_t n_bytes) +void *g_try_malloc(size_t size) { - __coverity_negative_sink__(n_bytes); - return malloc(n_bytes == 0 ? 1 : n_bytes); + return g_try_malloc_n(1, size); } -void *g_try_malloc0(size_t n_bytes) +void *g_try_malloc0(size_t size) { - __coverity_negative_sink__(n_bytes); - return calloc(1, n_bytes == 0 ? 1 : n_bytes); + return g_try_malloc0_n(1, size); } -void *g_try_realloc(void *mem, size_t n_bytes) +void *g_try_realloc(void *ptr, size_t size) { - __coverity_negative_sink__(n_bytes); - return realloc(mem, n_bytes == 0 ? 1 : n_bytes); + return g_try_realloc_n(ptr, 1, size); +} + +/* + * GLib string allocation functions + */ + +char *g_strdup(const char *s) +{ + char *dup; + size_t i; + + if (!s) { + return NULL; + } + + __coverity_string_null_sink__(s); + __coverity_string_size_sink__(s); + dup = __coverity_alloc_nosize__(); + __coverity_mark_as_afm_allocated__(dup, "g_free"); + for (i = 0; (dup[i] = s[i]); i++) ; + return dup; +} + +char *g_strndup(const char *s, size_t n) +{ + char *dup; + size_t i; + + __coverity_negative_sink__(n); + + if (!s) { + return NULL; + } + + dup = g_malloc(n + 1); + for (i = 0; i < n && (dup[i] = s[i]); i++) ; + dup[i] = 0; + return dup; +} + +char *g_strdup_printf(const char *format, ...) +{ + char ch, *s; + size_t len; + + __coverity_string_null_sink__(format); + __coverity_string_size_sink__(format); + + ch = *format; + + s = __coverity_alloc_nosize__(); + __coverity_writeall__(s); + __coverity_mark_as_afm_allocated__(s, "g_free"); + return s; +} + +char *g_strdup_vprintf(const char *format, va_list ap) +{ + char ch, *s; + size_t len; + + __coverity_string_null_sink__(format); + __coverity_string_size_sink__(format); + + ch = *format; + ch = *(char *)ap; + + s = __coverity_alloc_nosize__(); + __coverity_writeall__(s); + __coverity_mark_as_afm_allocated__(s, "g_free"); + + return len; +} + +char *g_strconcat(const char *s, ...) +{ + char *s; + + /* + * Can't model: last argument must be null, the others + * null-terminated strings + */ + + s = __coverity_alloc_nosize__(); + __coverity_writeall__(s); + __coverity_mark_as_afm_allocated__(s, "g_free"); + return s; } /* Other glib functions */ diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py index f7ce3fc483..b6c0bbead9 100755 --- a/scripts/vmstate-static-checker.py +++ b/scripts/vmstate-static-checker.py @@ -53,6 +53,8 @@ def check_fields_match(name, s_field, d_field): 'parent_obj.parent_obj.parent_obj', 'port.br.dev.exp.aer_log', 'parent_obj.parent_obj.parent_obj.exp.aer_log'], + 'cirrus_vga': ['hw_cursor_x', 'vga.hw_cursor_x', + 'hw_cursor_y', 'vga.hw_cursor_y'], 'lsiscsi': ['dev', 'parent_obj'], 'mch': ['d', 'parent_obj'], 'pci_bridge': ['bridge.dev', 'parent_obj', 'bridge.dev.shpc', 'shpc'], diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 285947f911..d38af747ac 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -113,7 +113,14 @@ static void arm_cpu_reset(CPUState *s) /* and to the FP/Neon instructions */ env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3); #else - env->pstate = PSTATE_MODE_EL1h; + /* Reset into the highest available EL */ + if (arm_feature(env, ARM_FEATURE_EL3)) { + env->pstate = PSTATE_MODE_EL3h; + } else if (arm_feature(env, ARM_FEATURE_EL2)) { + env->pstate = PSTATE_MODE_EL2h; + } else { + env->pstate = PSTATE_MODE_EL1h; + } env->pc = cpu->rvbar; #endif } else { @@ -320,6 +327,29 @@ static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level) kvm_set_irq(kvm_state, kvm_irq, level ? 1 : 0); #endif } + +static bool arm_cpu_is_big_endian(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + int cur_el; + + cpu_synchronize_state(cs); + + /* In 32bit guest endianness is determined by looking at CPSR's E bit */ + if (!is_a64(env)) { + return (env->uncached_cpsr & CPSR_E) ? 1 : 0; + } + + cur_el = arm_current_el(env); + + if (cur_el == 0) { + return (env->cp15.sctlr_el[1] & SCTLR_E0E) != 0; + } + + return (env->cp15.sctlr_el[cur_el] & SCTLR_EE) != 0; +} + #endif static inline void set_feature(CPUARMState *env, int feature) @@ -1189,6 +1219,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) cc->do_interrupt = arm_cpu_do_interrupt; cc->get_phys_page_debug = arm_cpu_get_phys_page_debug; cc->vmsd = &vmstate_arm_cpu; + cc->virtio_is_big_endian = arm_cpu_is_big_endian; #endif cc->gdb_num_core_regs = 26; cc->gdb_core_xml_file = "arm-core.xml"; diff --git a/target-arm/cpu.h b/target-arm/cpu.h index cd7a9e8e14..1830a12d4a 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -32,6 +32,8 @@ # define ELF_MACHINE EM_ARM #endif +#define TARGET_IS_BIENDIAN 1 + #define CPUArchState struct CPUARMState #include "qemu-common.h" @@ -98,7 +100,7 @@ typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info, struct arm_boot_info; -#define NB_MMU_MODES 4 +#define NB_MMU_MODES 7 /* We currently assume float and double are IEEE single and double precision respectively. @@ -1110,8 +1112,14 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) * a register definition to override a previous definition for the * same (cp, is64, crn, crm, opc1, opc2) tuple: either the new or the * old must have the OVERRIDE bit set. - * NO_MIGRATE indicates that this register should be ignored for migration; - * (eg because any state is accessed via some other coprocessor register). + * ALIAS indicates that this register is an alias view of some underlying + * state which is also visible via another register, and that the other + * register is handling migration; registers marked ALIAS will not be migrated + * but may have their state set by syncing of register state from KVM. + * NO_RAW indicates that this register has no underlying state and does not + * support raw access for state saving/loading; it will not be used for either + * migration or KVM state synchronization. (Typically this is for "registers" + * which are actually used as instructions for cache maintenance and so on.) * IO indicates that this register does I/O and therefore its accesses * need to be surrounded by gen_io_start()/gen_io_end(). In particular, * registers which implement clocks or timers require this. @@ -1121,8 +1129,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) #define ARM_CP_64BIT 4 #define ARM_CP_SUPPRESS_TB_END 8 #define ARM_CP_OVERRIDE 16 -#define ARM_CP_NO_MIGRATE 32 +#define ARM_CP_ALIAS 32 #define ARM_CP_IO 64 +#define ARM_CP_NO_RAW 128 #define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8)) #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8)) #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8)) @@ -1132,7 +1141,7 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) /* Used only as a terminator for ARMCPRegInfo lists */ #define ARM_CP_SENTINEL 0xffff /* Mask of only the flag bits in a type field */ -#define ARM_CP_FLAG_MASK 0x7f +#define ARM_CP_FLAG_MASK 0xff /* Valid values for ARMCPRegInfo state field, indicating which of * the AArch32 and AArch64 execution states this register is visible in. @@ -1211,6 +1220,10 @@ static inline bool cptype_valid(int cptype) */ static inline int arm_current_el(CPUARMState *env) { + if (arm_feature(env, ARM_FEATURE_M)) { + return !((env->v7m.exception == 0) && (env->v7m.control & 1)); + } + if (is_a64(env)) { return extract32(env->pstate, 2, 2); } @@ -1568,13 +1581,90 @@ static inline CPUARMState *cpu_init(const char *cpu_model) #define cpu_signal_handler cpu_arm_signal_handler #define cpu_list arm_cpu_list -/* MMU modes definitions */ -#define MMU_MODE0_SUFFIX _user -#define MMU_MODE1_SUFFIX _kernel +/* ARM has the following "translation regimes" (as the ARM ARM calls them): + * + * If EL3 is 64-bit: + * + NonSecure EL1 & 0 stage 1 + * + NonSecure EL1 & 0 stage 2 + * + NonSecure EL2 + * + Secure EL1 & EL0 + * + Secure EL3 + * If EL3 is 32-bit: + * + NonSecure PL1 & 0 stage 1 + * + NonSecure PL1 & 0 stage 2 + * + NonSecure PL2 + * + Secure PL0 & PL1 + * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) + * + * For QEMU, an mmu_idx is not quite the same as a translation regime because: + * 1. we need to split the "EL1 & 0" regimes into two mmu_idxes, because they + * may differ in access permissions even if the VA->PA map is the same + * 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2 + * translation, which means that we have one mmu_idx that deals with two + * concatenated translation regimes [this sort of combined s1+2 TLB is + * architecturally permitted] + * 3. we don't need to allocate an mmu_idx to translations that we won't be + * handling via the TLB. The only way to do a stage 1 translation without + * the immediate stage 2 translation is via the ATS or AT system insns, + * which can be slow-pathed and always do a page table walk. + * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" + * translation regimes, because they map reasonably well to each other + * and they can't both be active at the same time. + * This gives us the following list of mmu_idx values: + * + * NS EL0 (aka NS PL0) stage 1+2 + * NS EL1 (aka NS PL1) stage 1+2 + * NS EL2 (aka NS PL2) + * S EL3 (aka S PL1) + * S EL0 (aka S PL0) + * S EL1 (not used if EL3 is 32 bit) + * NS EL0+1 stage 2 + * + * (The last of these is an mmu_idx because we want to be able to use the TLB + * for the accesses done as part of a stage 1 page table walk, rather than + * having to walk the stage 2 page table over and over.) + * + * Our enumeration includes at the end some entries which are not "true" + * mmu_idx values in that they don't have corresponding TLBs and are only + * valid for doing slow path page table walks. + * + * The constant names here are patterned after the general style of the names + * of the AT/ATS operations. + * The values used are carefully arranged to make mmu_idx => EL lookup easy. + */ +typedef enum ARMMMUIdx { + ARMMMUIdx_S12NSE0 = 0, + ARMMMUIdx_S12NSE1 = 1, + ARMMMUIdx_S1E2 = 2, + ARMMMUIdx_S1E3 = 3, + ARMMMUIdx_S1SE0 = 4, + ARMMMUIdx_S1SE1 = 5, + ARMMMUIdx_S2NS = 6, + /* Indexes below here don't have TLBs and are used only for AT system + * instructions or for the first stage of an S12 page table walk. + */ + ARMMMUIdx_S1NSE0 = 7, + ARMMMUIdx_S1NSE1 = 8, +} ARMMMUIdx; + #define MMU_USER_IDX 0 -static inline int cpu_mmu_index (CPUARMState *env) + +/* Return the exception level we're running at if this is our mmu_idx */ +static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) { - return arm_current_el(env); + assert(mmu_idx < ARMMMUIdx_S2NS); + return mmu_idx & 3; +} + +/* Determine the current mmu_idx to use for normal loads/stores */ +static inline int cpu_mmu_index(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el < 2 && arm_is_secure_below_el3(env)) { + return ARMMMUIdx_S1SE0 + el; + } + return el; } /* Return the Exception Level targeted by debug exceptions; @@ -1641,9 +1731,13 @@ static inline bool arm_singlestep_active(CPUARMState *env) /* Bit usage in the TB flags field: bit 31 indicates whether we are * in 32 or 64 bit mode. The meaning of the other bits depends on that. + * We put flags which are shared between 32 and 64 bit mode at the top + * of the word, and flags which apply to only one mode at the bottom. */ #define ARM_TBFLAG_AARCH64_STATE_SHIFT 31 #define ARM_TBFLAG_AARCH64_STATE_MASK (1U << ARM_TBFLAG_AARCH64_STATE_SHIFT) +#define ARM_TBFLAG_MMUIDX_SHIFT 28 +#define ARM_TBFLAG_MMUIDX_MASK (0x7 << ARM_TBFLAG_MMUIDX_SHIFT) /* Bit usage when in AArch32 state: */ #define ARM_TBFLAG_THUMB_SHIFT 0 @@ -1652,8 +1746,6 @@ static inline bool arm_singlestep_active(CPUARMState *env) #define ARM_TBFLAG_VECLEN_MASK (0x7 << ARM_TBFLAG_VECLEN_SHIFT) #define ARM_TBFLAG_VECSTRIDE_SHIFT 4 #define ARM_TBFLAG_VECSTRIDE_MASK (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT) -#define ARM_TBFLAG_PRIV_SHIFT 6 -#define ARM_TBFLAG_PRIV_MASK (1 << ARM_TBFLAG_PRIV_SHIFT) #define ARM_TBFLAG_VFPEN_SHIFT 7 #define ARM_TBFLAG_VFPEN_MASK (1 << ARM_TBFLAG_VFPEN_SHIFT) #define ARM_TBFLAG_CONDEXEC_SHIFT 8 @@ -1679,8 +1771,6 @@ static inline bool arm_singlestep_active(CPUARMState *env) #define ARM_TBFLAG_NS_MASK (1 << ARM_TBFLAG_NS_SHIFT) /* Bit usage when in AArch64 state */ -#define ARM_TBFLAG_AA64_EL_SHIFT 0 -#define ARM_TBFLAG_AA64_EL_MASK (0x3 << ARM_TBFLAG_AA64_EL_SHIFT) #define ARM_TBFLAG_AA64_FPEN_SHIFT 2 #define ARM_TBFLAG_AA64_FPEN_MASK (1 << ARM_TBFLAG_AA64_FPEN_SHIFT) #define ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT 3 @@ -1691,14 +1781,14 @@ static inline bool arm_singlestep_active(CPUARMState *env) /* some convenience accessor macros */ #define ARM_TBFLAG_AARCH64_STATE(F) \ (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT) +#define ARM_TBFLAG_MMUIDX(F) \ + (((F) & ARM_TBFLAG_MMUIDX_MASK) >> ARM_TBFLAG_MMUIDX_SHIFT) #define ARM_TBFLAG_THUMB(F) \ (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT) #define ARM_TBFLAG_VECLEN(F) \ (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT) #define ARM_TBFLAG_VECSTRIDE(F) \ (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT) -#define ARM_TBFLAG_PRIV(F) \ - (((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT) #define ARM_TBFLAG_VFPEN(F) \ (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT) #define ARM_TBFLAG_CONDEXEC(F) \ @@ -1713,8 +1803,6 @@ static inline bool arm_singlestep_active(CPUARMState *env) (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT) #define ARM_TBFLAG_XSCALE_CPAR(F) \ (((F) & ARM_TBFLAG_XSCALE_CPAR_MASK) >> ARM_TBFLAG_XSCALE_CPAR_SHIFT) -#define ARM_TBFLAG_AA64_EL(F) \ - (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT) #define ARM_TBFLAG_AA64_FPEN(F) \ (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT) #define ARM_TBFLAG_AA64_SS_ACTIVE(F) \ @@ -1738,8 +1826,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (is_a64(env)) { *pc = env->pc; - *flags = ARM_TBFLAG_AARCH64_STATE_MASK - | (arm_current_el(env) << ARM_TBFLAG_AA64_EL_SHIFT); + *flags = ARM_TBFLAG_AARCH64_STATE_MASK; if (fpen == 3 || (fpen == 1 && arm_current_el(env) != 0)) { *flags |= ARM_TBFLAG_AA64_FPEN_MASK; } @@ -1757,21 +1844,12 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, } } } else { - int privmode; *pc = env->regs[15]; *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT); - if (arm_feature(env, ARM_FEATURE_M)) { - privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1)); - } else { - privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR; - } - if (privmode) { - *flags |= ARM_TBFLAG_PRIV_MASK; - } if (!(access_secure_reg(env))) { *flags |= ARM_TBFLAG_NS_MASK; } @@ -1799,6 +1877,8 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, << ARM_TBFLAG_XSCALE_CPAR_SHIFT); } + *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT); + *cs_base = 0; } diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index 81066ca936..8aa40e9763 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -135,6 +135,9 @@ float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) { float_status *fpst = fpstp; + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + if ((float32_is_zero(a) && float32_is_infinity(b)) || (float32_is_infinity(a) && float32_is_zero(b))) { /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ @@ -148,6 +151,9 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) { float_status *fpst = fpstp; + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + if ((float64_is_zero(a) && float64_is_infinity(b)) || (float64_is_infinity(a) && float64_is_zero(b))) { /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ @@ -223,6 +229,9 @@ float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) { float_status *fpst = fpstp; + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + a = float32_chs(a); if ((float32_is_infinity(a) && float32_is_zero(b)) || (float32_is_infinity(b) && float32_is_zero(a))) { @@ -235,6 +244,9 @@ float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) { float_status *fpst = fpstp; + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + a = float64_chs(a); if ((float64_is_infinity(a) && float64_is_zero(b)) || (float64_is_infinity(b) && float64_is_zero(a))) { @@ -247,6 +259,9 @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) { float_status *fpst = fpstp; + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + a = float32_chs(a); if ((float32_is_infinity(a) && float32_is_zero(b)) || (float32_is_infinity(b) && float32_is_zero(a))) { @@ -259,6 +274,9 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) { float_status *fpst = fpstp; + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + a = float64_chs(a); if ((float64_is_infinity(a) && float64_is_zero(b)) || (float64_is_infinity(b) && float64_is_zero(a))) { diff --git a/target-arm/helper.c b/target-arm/helper.c index 1a5e0678b0..1a1a00577e 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -13,7 +13,7 @@ #ifndef CONFIG_USER_ONLY static inline int get_phys_addr(CPUARMState *env, target_ulong address, - int access_type, int is_user, + int access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, int *prot, target_ulong *page_size); @@ -119,6 +119,7 @@ static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) { + assert(ri->fieldoffset); if (cpreg_field_is_64bit(ri)) { return CPREG_FIELD64(env, ri); } else { @@ -129,6 +130,7 @@ static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + assert(ri->fieldoffset); if (cpreg_field_is_64bit(ri)) { CPREG_FIELD64(env, ri) = value; } else { @@ -174,6 +176,27 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, } } +static bool raw_accessors_invalid(const ARMCPRegInfo *ri) +{ + /* Return true if the regdef would cause an assertion if you called + * read_raw_cp_reg() or write_raw_cp_reg() on it (ie if it is a + * program bug for it not to have the NO_RAW flag). + * NB that returning false here doesn't necessarily mean that calling + * read/write_raw_cp_reg() is safe, because we can't distinguish "has + * read/write access functions which are safe for raw use" from "has + * read/write access functions which have side effects but has forgotten + * to provide raw access functions". + * The tests here line up with the conditions in read/write_raw_cp_reg() + * and assertions in raw_read()/raw_write(). + */ + if ((ri->type & ARM_CP_CONST) || + ri->fieldoffset || + ((ri->raw_writefn || ri->writefn) && (ri->raw_readfn || ri->readfn))) { + return false; + } + return true; +} + bool write_cpustate_to_list(ARMCPU *cpu) { /* Write the coprocessor state from cpu->env to the (index,value) list. */ @@ -189,7 +212,7 @@ bool write_cpustate_to_list(ARMCPU *cpu) ok = false; continue; } - if (ri->type & ARM_CP_NO_MIGRATE) { + if (ri->type & ARM_CP_NO_RAW) { continue; } cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri); @@ -212,7 +235,7 @@ bool write_list_to_cpustate(ARMCPU *cpu) ok = false; continue; } - if (ri->type & ARM_CP_NO_MIGRATE) { + if (ri->type & ARM_CP_NO_RAW) { continue; } /* Write value and confirm it reads back as written @@ -236,7 +259,7 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque) regidx = *(uint32_t *)key; ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); - if (!(ri->type & ARM_CP_NO_MIGRATE)) { + if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx); /* The value array need not be initialized at this point */ cpu->cpreg_array_len++; @@ -252,7 +275,7 @@ static void count_cpreg(gpointer key, gpointer opaque) regidx = *(uint32_t *)key; ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); - if (!(ri->type & ARM_CP_NO_MIGRATE)) { + if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { cpu->cpreg_array_len++; } } @@ -508,7 +531,7 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { .resetvalue = 0 }, /* v6 doesn't have the cache ID registers but Linux reads them anyway */ { .name = "DUMMY", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = CP_ANY, - .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE, + .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, /* We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR; * implementing it as RAZ means the "debug architecture version" bits @@ -522,16 +545,16 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { */ { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write, - .type = ARM_CP_NO_MIGRATE }, + .type = ARM_CP_NO_RAW }, { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write, - .type = ARM_CP_NO_MIGRATE }, + .type = ARM_CP_NO_RAW }, { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write, - .type = ARM_CP_NO_MIGRATE }, + .type = ARM_CP_NO_RAW }, { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write, - .type = ARM_CP_NO_MIGRATE }, + .type = ARM_CP_NO_RAW }, REGINFO_SENTINEL }; @@ -854,7 +877,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. */ { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_NO_MIGRATE, + .access = PL0_RW, .type = ARM_CP_ALIAS, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), .writefn = pmcntenset_write, .accessfn = pmreg_access, @@ -869,11 +892,11 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), .accessfn = pmreg_access, .writefn = pmcntenclr_write, - .type = ARM_CP_NO_MIGRATE }, + .type = ARM_CP_ALIAS }, { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, .access = PL0_RW, .accessfn = pmreg_access, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .writefn = pmcntenclr_write }, { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, @@ -928,7 +951,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .resetvalue = 0, .writefn = pmintenset_write, .raw_writefn = raw_write }, { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, + .access = PL1_RW, .type = ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .resetvalue = 0, .writefn = pmintenclr_write, }, { .name = "VBAR", .state = ARM_CP_STATE_BOTH, @@ -939,7 +962,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .resetvalue = 0 }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, - .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_MIGRATE }, + .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_RAW }, { .name = "CSSELR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0, .access = PL1_RW, .writefn = csselr_write, .resetvalue = 0, @@ -988,44 +1011,44 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .resetfn = arm_cp_reset_ignore }, { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, .access = PL1_R, .readfn = isr_read }, + .type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read }, /* 32 bit ITLB invalidates */ { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write }, { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write }, /* 32 bit DTLB invalidates */ { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write }, { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write }, /* 32 bit TLB invalidates */ { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write }, { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write }, { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_write }, REGINFO_SENTINEL }; static const ARMCPRegInfo v7mp_cp_reginfo[] = { /* 32 bit TLB invalidates, Inner Shareable */ { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_is_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_is_write }, { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_is_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_is_write }, { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_is_write }, { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_is_write }, REGINFO_SENTINEL }; @@ -1268,7 +1291,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { * Our reset value matches the fixed frequency we implement the timer at. */ { .name = "CNTFRQ", .cp = 15, .crn = 14, .crm = 0, .opc1 = 0, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_cntfrq), .resetfn = arm_cp_reset_ignore, @@ -1288,7 +1311,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { }, /* per-timer control */ { .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1, - .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R, + .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R, .accessfn = gt_ptimer_access, .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl), @@ -1304,7 +1327,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .writefn = gt_ctl_write, .raw_writefn = raw_write, }, { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1, - .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R, + .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R, .accessfn = gt_vtimer_access, .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl), @@ -1321,52 +1344,52 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { }, /* TimerValue views: a 32 bit downcounting view of the underlying state */ { .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, .accessfn = gt_ptimer_access, .readfn = gt_tval_read, .writefn = gt_tval_write, }, { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, .readfn = gt_tval_read, .writefn = gt_tval_write, }, { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, .accessfn = gt_vtimer_access, .readfn = gt_tval_read, .writefn = gt_tval_write, }, { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, .readfn = gt_tval_read, .writefn = gt_tval_write, }, /* The counter itself */ { .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0, - .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO, + .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = gt_pct_access, .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, }, { .name = "CNTPCT_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 1, - .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = gt_pct_access, .readfn = gt_cnt_read, .resetfn = gt_cnt_reset, }, { .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1, - .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO, + .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = gt_vct_access, .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, }, { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2, - .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = gt_vct_access, .readfn = gt_cnt_read, .resetfn = gt_cnt_reset, }, /* Comparison value, indicating when the timer goes off */ { .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2, .access = PL1_RW | PL0_R, - .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE, + .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), .accessfn = gt_ptimer_access, .resetfn = arm_cp_reset_ignore, .writefn = gt_cval_write, .raw_writefn = raw_write, @@ -1381,7 +1404,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { }, { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3, .access = PL1_RW | PL0_R, - .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE, + .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), .accessfn = gt_vtimer_access, .resetfn = arm_cp_reset_ignore, .writefn = gt_cval_write, .raw_writefn = raw_write, @@ -1428,23 +1451,23 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri) /* Other states are only available with TrustZone; in * a non-TZ implementation these registers don't exist * at all, which is an Uncategorized trap. This underdecoding - * is safe because the reginfo is NO_MIGRATE. + * is safe because the reginfo is NO_RAW. */ return CP_ACCESS_TRAP_UNCATEGORIZED; } return CP_ACCESS_OK; } -static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static uint64_t do_ats_write(CPUARMState *env, uint64_t value, + int access_type, ARMMMUIdx mmu_idx) { hwaddr phys_addr; target_ulong page_size; int prot; - int ret, is_user = ri->opc2 & 2; - int access_type = ri->opc2 & 1; + int ret; uint64_t par64; - ret = get_phys_addr(env, value, access_type, is_user, + ret = get_phys_addr(env, value, access_type, mmu_idx, &phys_addr, &prot, &page_size); if (extended_addresses_enabled(env)) { /* ret is a DFSR/IFSR value for the long descriptor @@ -1481,9 +1504,105 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) ((ret & 0xf) << 1) | 1; } } + return par64; +} + +static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + int access_type = ri->opc2 & 1; + uint64_t par64; + ARMMMUIdx mmu_idx; + int el = arm_current_el(env); + bool secure = arm_is_secure_below_el3(env); + + switch (ri->opc2 & 6) { + case 0: + /* stage 1 current state PL1: ATS1CPR, ATS1CPW */ + switch (el) { + case 3: + mmu_idx = ARMMMUIdx_S1E3; + break; + case 2: + mmu_idx = ARMMMUIdx_S1NSE1; + break; + case 1: + mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S1NSE1; + break; + default: + g_assert_not_reached(); + } + break; + case 2: + /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ + switch (el) { + case 3: + mmu_idx = ARMMMUIdx_S1SE0; + break; + case 2: + mmu_idx = ARMMMUIdx_S1NSE0; + break; + case 1: + mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S1NSE0; + break; + default: + g_assert_not_reached(); + } + break; + case 4: + /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ + mmu_idx = ARMMMUIdx_S12NSE1; + break; + case 6: + /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ + mmu_idx = ARMMMUIdx_S12NSE0; + break; + default: + g_assert_not_reached(); + } + + par64 = do_ats_write(env, value, access_type, mmu_idx); A32_BANKED_CURRENT_REG_SET(env, par, par64); } + +static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + int access_type = ri->opc2 & 1; + ARMMMUIdx mmu_idx; + int secure = arm_is_secure_below_el3(env); + + switch (ri->opc2 & 6) { + case 0: + switch (ri->opc1) { + case 0: /* AT S1E1R, AT S1E1W */ + mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S1NSE1; + break; + case 4: /* AT S1E2R, AT S1E2W */ + mmu_idx = ARMMMUIdx_S1E2; + break; + case 6: /* AT S1E3R, AT S1E3W */ + mmu_idx = ARMMMUIdx_S1E3; + break; + default: + g_assert_not_reached(); + } + break; + case 2: /* AT S1E0R, AT S1E0W */ + mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S1NSE0; + break; + case 4: /* AT S12E1R, AT S12E1W */ + mmu_idx = ARMMMUIdx_S12NSE1; + break; + case 6: /* AT S12E0R, AT S12E0W */ + mmu_idx = ARMMMUIdx_S12NSE0; + break; + default: + g_assert_not_reached(); + } + + env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx); +} #endif static const ARMCPRegInfo vapa_cp_reginfo[] = { @@ -1495,7 +1614,7 @@ static const ARMCPRegInfo vapa_cp_reginfo[] = { #ifndef CONFIG_USER_ONLY { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, .access = PL1_W, .accessfn = ats_access, - .writefn = ats_write, .type = ARM_CP_NO_MIGRATE }, + .writefn = ats_write, .type = ARM_CP_NO_RAW }, #endif REGINFO_SENTINEL }; @@ -1554,12 +1673,12 @@ static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri) static const ARMCPRegInfo pmsav5_cp_reginfo[] = { { .name = "DATA_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, + .access = PL1_RW, .type = ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap), .resetvalue = 0, .readfn = pmsav5_data_ap_read, .writefn = pmsav5_data_ap_write, }, { .name = "INSN_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1, - .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, + .access = PL1_RW, .type = ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap), .resetvalue = 0, .readfn = pmsav5_insn_ap_read, .writefn = pmsav5_insn_ap_write, }, @@ -1691,7 +1810,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, static const ARMCPRegInfo vmsa_cp_reginfo[] = { { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, + .access = PL1_RW, .type = ARM_CP_ALIAS, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dfsr_s), offsetoflow32(CPUARMState, cp15.dfsr_ns) }, .resetfn = arm_cp_reset_ignore, }, @@ -1719,7 +1838,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[1]) }, { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write, + .access = PL1_RW, .type = ARM_CP_ALIAS, .writefn = vmsa_ttbcr_write, .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tcr_el[3]), offsetoflow32(CPUARMState, cp15.tcr_el[1])} }, @@ -1789,7 +1908,7 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { .writefn = omap_threadid_write }, { .name = "TI925T_STATUS", .cp = 15, .crn = 15, .crm = 8, .opc1 = 0, .opc2 = 0, .access = PL1_RW, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_NO_RAW, .readfn = arm_cp_read_zero, .writefn = omap_wfi_write, }, /* TODO: Peripheral port remap register: * On OMAP2 mcr p15, 0, rn, c15, c2, 4 sets up the interrupt controller @@ -1798,7 +1917,7 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { */ { .name = "OMAP_CACHEMAINT", .cp = 15, .crn = 7, .crm = CP_ANY, .opc1 = 0, .opc2 = CP_ANY, .access = PL1_W, - .type = ARM_CP_OVERRIDE | ARM_CP_NO_MIGRATE, + .type = ARM_CP_OVERRIDE | ARM_CP_NO_RAW, .writefn = omap_cachemaint_write }, { .name = "C9", .cp = 15, .crn = 9, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, @@ -1848,7 +1967,7 @@ static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { { .name = "C15_IMPDEF", .cp = 15, .crn = 15, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, - .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE | ARM_CP_OVERRIDE, + .type = ARM_CP_CONST | ARM_CP_NO_RAW | ARM_CP_OVERRIDE, .resetvalue = 0 }, REGINFO_SENTINEL }; @@ -1856,7 +1975,7 @@ static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = { /* Cache status: RAZ because we have no cache so it's always clean */ { .name = "CDSR", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 6, - .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE, + .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, REGINFO_SENTINEL }; @@ -1864,7 +1983,7 @@ static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = { static const ARMCPRegInfo cache_block_ops_cp_reginfo[] = { /* We never have a a block transfer operation in progress */ { .name = "BXSR", .cp = 15, .crn = 7, .crm = 12, .opc1 = 0, .opc2 = 4, - .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, /* The cache ops themselves: these all NOP for QEMU */ { .name = "IICR", .cp = 15, .crm = 5, .opc1 = 0, @@ -1887,10 +2006,10 @@ static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = { * to indicate that there are no dirty cache lines. */ { .name = "TC_DCACHE", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 3, - .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = (1 << 30) }, { .name = "TCI_DCACHE", .cp = 15, .crn = 7, .crm = 14, .opc1 = 0, .opc2 = 3, - .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = (1 << 30) }, REGINFO_SENTINEL }; @@ -1900,7 +2019,7 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = { { .name = "C9_READBUFFER", .cp = 15, .crn = 9, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, - .type = ARM_CP_CONST | ARM_CP_OVERRIDE | ARM_CP_NO_MIGRATE }, + .type = ARM_CP_CONST | ARM_CP_OVERRIDE | ARM_CP_NO_RAW }, REGINFO_SENTINEL }; @@ -1926,7 +2045,7 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) static const ARMCPRegInfo mpidr_cp_reginfo[] = { { .name = "MPIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, - .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_MIGRATE }, + .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, REGINFO_SENTINEL }; @@ -1947,12 +2066,12 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { .bank_fieldoffsets = { offsetof(CPUARMState, cp15.par_s), offsetof(CPUARMState, cp15.par_ns)} }, { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0, - .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE, + .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) }, .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore }, { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1, - .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE, + .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) }, .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore }, @@ -2144,7 +2263,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .access = PL0_RW, .type = ARM_CP_NZCV }, { .name = "DAIF", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 2, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_NO_RAW, .access = PL0_RW, .accessfn = aa64_daif_access, .fieldoffset = offsetof(CPUARMState, daif), .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, @@ -2156,7 +2275,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write }, { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0, - .access = PL0_R, .type = ARM_CP_NO_MIGRATE, + .access = PL0_R, .type = ARM_CP_NO_RAW, .readfn = aa64_dczid_read }, { .name = "DC_ZVA", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1, @@ -2207,77 +2326,77 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { /* TLBI operations */ { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbiall_is_write }, { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_va_is_write }, { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_asid_is_write }, { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vaa_is_write }, { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_va_is_write }, { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vaa_is_write }, { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbiall_write }, { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_va_write }, { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_asid_write }, { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vaa_write }, { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_va_write }, { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vaa_write }, #ifndef CONFIG_USER_ONLY /* 64 bit address translation operations */ { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write }, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write }, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write }, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write }, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, #endif /* TLB invalidate last level of translation table walk */ { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_is_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_is_write }, { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_is_write }, { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, - .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write }, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_write }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W }, @@ -2312,12 +2431,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s), offsetoflow32(CPUARMState, cp15.dacr_ns) } }, { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, elr_el[1]) }, { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[0]) }, /* We rely on the access checks not allowing the guest to write to the @@ -2327,11 +2446,15 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "SP_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 1, .opc2 = 0, .access = PL1_RW, .accessfn = sp_el0_access, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, sp_el[0]) }, + { .name = "SP_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 1, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, sp_el[1]) }, { .name = "SPSel", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_NO_RAW, .access = PL1_RW, .readfn = spsel_read, .writefn = spsel_write }, REGINFO_SENTINEL }; @@ -2343,7 +2466,7 @@ static const ARMCPRegInfo v8_el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_NO_RAW, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, @@ -2386,12 +2509,12 @@ static const ARMCPRegInfo v8_el2_cp_reginfo[] = { .writefn = dacr_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, elr_el[2]) }, { .name = "ESR_EL2", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) }, { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64, @@ -2402,7 +2525,7 @@ static const ARMCPRegInfo v8_el2_cp_reginfo[] = { .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) }, { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[6]) }, { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64, @@ -2410,6 +2533,10 @@ static const ARMCPRegInfo v8_el2_cp_reginfo[] = { .access = PL2_RW, .writefn = vbar_write, .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[2]), .resetvalue = 0 }, + { .name = "SP_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 1, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, sp_el[2]) }, REGINFO_SENTINEL }; @@ -2418,7 +2545,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3), .resetvalue = 0, .writefn = scr_write }, - { .name = "SCR", .type = ARM_CP_NO_MIGRATE, + { .name = "SCR", .type = ARM_CP_ALIAS, .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3), .resetfn = arm_cp_reset_ignore, .writefn = scr_write }, @@ -2451,19 +2578,19 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[3]) }, { .name = "ELR_EL3", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 1, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, elr_el[3]) }, { .name = "ESR_EL3", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 2, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[3]) }, { .name = "FAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[3]) }, { .name = "SPSR_EL3", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[7]) }, { .name = "VBAR_EL3", .state = ARM_CP_STATE_AA64, @@ -2510,7 +2637,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { */ { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, - .type = ARM_CP_NO_MIGRATE, + .type = ARM_CP_ALIAS, .access = PL1_R, .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), .resetfn = arm_cp_reset_ignore }, @@ -2963,7 +3090,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo pmcr = { .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, .access = PL0_RW, - .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, + .type = ARM_CP_IO | ARM_CP_ALIAS, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), .accessfn = pmreg_access, .writefn = pmcr_write, .raw_writefn = raw_write, @@ -3053,17 +3180,30 @@ void register_cp_regs_for_features(ARMCPU *cpu) .resetvalue = cpu->mvfr2 }, REGINFO_SENTINEL }; - ARMCPRegInfo rvbar = { - .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2, - .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar - }; - define_one_arm_cp_reg(cpu, &rvbar); + /* RVBAR_EL1 is only implemented if EL1 is the highest EL */ + if (!arm_feature(env, ARM_FEATURE_EL3) && + !arm_feature(env, ARM_FEATURE_EL2)) { + ARMCPRegInfo rvbar = { + .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar + }; + define_one_arm_cp_reg(cpu, &rvbar); + } define_arm_cp_regs(cpu, v8_idregs); define_arm_cp_regs(cpu, v8_cp_reginfo); } if (arm_feature(env, ARM_FEATURE_EL2)) { define_arm_cp_regs(cpu, v8_el2_cp_reginfo); + /* RVBAR_EL2 is only implemented if EL2 is the highest EL */ + if (!arm_feature(env, ARM_FEATURE_EL3)) { + ARMCPRegInfo rvbar = { + .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, + .type = ARM_CP_CONST, .access = PL2_R, .resetvalue = cpu->rvbar + }; + define_one_arm_cp_reg(cpu, &rvbar); + } } else { /* If EL2 is missing but higher ELs are enabled, we need to * register the no_el2 reginfos. @@ -3074,6 +3214,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) } if (arm_feature(env, ARM_FEATURE_EL3)) { define_arm_cp_regs(cpu, el3_cp_reginfo); + ARMCPRegInfo rvbar = { + .name = "RVBAR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 1, + .type = ARM_CP_CONST, .access = PL3_R, .resetvalue = cpu->rvbar + }; + define_one_arm_cp_reg(cpu, &rvbar); } if (arm_feature(env, ARM_FEATURE_MPU)) { /* These are the MPU registers prior to PMSAv6. Any new @@ -3440,14 +3586,14 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, */ if ((r->state == ARM_CP_STATE_BOTH && ns) || (arm_feature(&cpu->env, ARM_FEATURE_V8) && !ns)) { - r2->type |= ARM_CP_NO_MIGRATE; + r2->type |= ARM_CP_ALIAS; r2->resetfn = arm_cp_reset_ignore; } } else if ((secstate != r->secure) && !ns) { /* The register is not banked so we only want to allow migration of * the non-secure instance. */ - r2->type |= ARM_CP_NO_MIGRATE; + r2->type |= ARM_CP_ALIAS; r2->resetfn = arm_cp_reset_ignore; } @@ -3496,15 +3642,25 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, r2->opc2 = opc2; /* By convention, for wildcarded registers only the first * entry is used for migration; the others are marked as - * NO_MIGRATE so we don't try to transfer the register + * ALIAS so we don't try to transfer the register * multiple times. Special registers (ie NOP/WFI) are - * never migratable. + * never migratable and not even raw-accessible. */ - if ((r->type & ARM_CP_SPECIAL) || - ((r->crm == CP_ANY) && crm != 0) || + if ((r->type & ARM_CP_SPECIAL)) { + r2->type |= ARM_CP_NO_RAW; + } + if (((r->crm == CP_ANY) && crm != 0) || ((r->opc1 == CP_ANY) && opc1 != 0) || ((r->opc2 == CP_ANY) && opc2 != 0)) { - r2->type |= ARM_CP_NO_MIGRATE; + r2->type |= ARM_CP_ALIAS; + } + + /* Check that raw accesses are either forbidden or handled. Note that + * we can't assert this earlier because the setup of fieldoffset for + * banked registers has to be done first. + */ + if (!(r2->type & ARM_CP_NO_RAW)) { + assert(!raw_accessors_invalid(r2)); } /* Overriding of an existing definition must be explicitly @@ -4460,91 +4616,170 @@ void arm_cpu_do_interrupt(CPUState *cs) cs->interrupt_request |= CPU_INTERRUPT_EXITTB; } + +/* Return the exception level which controls this address translation regime */ +static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_S2NS: + case ARMMMUIdx_S1E2: + return 2; + case ARMMMUIdx_S1E3: + return 3; + case ARMMMUIdx_S1SE0: + return arm_el_is_aa64(env, 3) ? 1 : 3; + case ARMMMUIdx_S1SE1: + case ARMMMUIdx_S1NSE0: + case ARMMMUIdx_S1NSE1: + return 1; + default: + g_assert_not_reached(); + } +} + +/* Return the SCTLR value which controls this address translation regime */ +static inline uint32_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; +} + +/* Return true if the specified stage of address translation is disabled */ +static inline bool regime_translation_disabled(CPUARMState *env, + ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_S2NS) { + return (env->cp15.hcr_el2 & HCR_VM) == 0; + } + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; +} + +/* Return the TCR controlling this translation regime */ +static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_S2NS) { + /* TODO: return VTCR_EL2 */ + g_assert_not_reached(); + } + return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; +} + +/* Return true if the translation regime is using LPAE format page tables */ +static inline bool regime_using_lpae_format(CPUARMState *env, + ARMMMUIdx mmu_idx) +{ + int el = regime_el(env, mmu_idx); + if (el == 2 || arm_el_is_aa64(env, el)) { + return true; + } + if (arm_feature(env, ARM_FEATURE_LPAE) + && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) { + return true; + } + return false; +} + +static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_S1SE0: + case ARMMMUIdx_S1NSE0: + return true; + default: + return false; + case ARMMMUIdx_S12NSE0: + case ARMMMUIdx_S12NSE1: + g_assert_not_reached(); + } +} + /* Check section/page access permissions. Returns the page protection flags, or zero if the access is not permitted. */ -static inline int check_ap(CPUARMState *env, int ap, int domain_prot, - int access_type, int is_user) -{ - int prot_ro; - - if (domain_prot == 3) { - return PAGE_READ | PAGE_WRITE; - } - - if (access_type == 1) - prot_ro = 0; - else - prot_ro = PAGE_READ; - - switch (ap) { - case 0: - if (arm_feature(env, ARM_FEATURE_V7)) { - return 0; - } - if (access_type == 1) - return 0; - switch (A32_BANKED_CURRENT_REG_GET(env, sctlr) & (SCTLR_S | SCTLR_R)) { - case SCTLR_S: - return is_user ? 0 : PAGE_READ; - case SCTLR_R: - return PAGE_READ; - default: - return 0; - } - case 1: - return is_user ? 0 : PAGE_READ | PAGE_WRITE; - case 2: - if (is_user) - return prot_ro; - else - return PAGE_READ | PAGE_WRITE; - case 3: - return PAGE_READ | PAGE_WRITE; - case 4: /* Reserved. */ - return 0; - case 5: - return is_user ? 0 : prot_ro; - case 6: - return prot_ro; - case 7: - if (!arm_feature (env, ARM_FEATURE_V6K)) - return 0; - return prot_ro; - default: - abort(); - } -} - -static bool get_level1_table_address(CPUARMState *env, uint32_t *table, - uint32_t address) -{ - /* Get the TCR bank based on our security state */ - TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1]; - - /* We only get here if EL1 is running in AArch32. If EL3 is running in - * AArch32 there is a secure and non-secure instance of the translation - * table registers. - */ +static inline int check_ap(CPUARMState *env, ARMMMUIdx mmu_idx, + int ap, int domain_prot, + int access_type) +{ + int prot_ro; + bool is_user = regime_is_user(env, mmu_idx); + + if (domain_prot == 3) { + return PAGE_READ | PAGE_WRITE; + } + + if (access_type == 1) { + prot_ro = 0; + } else { + prot_ro = PAGE_READ; + } + + switch (ap) { + case 0: + if (arm_feature(env, ARM_FEATURE_V7)) { + return 0; + } + if (access_type == 1) { + return 0; + } + switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { + case SCTLR_S: + return is_user ? 0 : PAGE_READ; + case SCTLR_R: + return PAGE_READ; + default: + return 0; + } + case 1: + return is_user ? 0 : PAGE_READ | PAGE_WRITE; + case 2: + if (is_user) { + return prot_ro; + } else { + return PAGE_READ | PAGE_WRITE; + } + case 3: + return PAGE_READ | PAGE_WRITE; + case 4: /* Reserved. */ + return 0; + case 5: + return is_user ? 0 : prot_ro; + case 6: + return prot_ro; + case 7: + if (!arm_feature(env, ARM_FEATURE_V6K)) { + return 0; + } + return prot_ro; + default: + abort(); + } +} + +static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t *table, uint32_t address) +{ + /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ + int el = regime_el(env, mmu_idx); + TCR *tcr = regime_tcr(env, mmu_idx); + if (address & tcr->mask) { if (tcr->raw_tcr & TTBCR_PD1) { /* Translation table walk disabled for TTBR1 */ return false; } - *table = A32_BANKED_CURRENT_REG_GET(env, ttbr1) & 0xffffc000; + *table = env->cp15.ttbr1_el[el] & 0xffffc000; } else { if (tcr->raw_tcr & TTBCR_PD0) { /* Translation table walk disabled for TTBR0 */ return false; } - *table = A32_BANKED_CURRENT_REG_GET(env, ttbr0) & tcr->base_mask; + *table = env->cp15.ttbr0_el[el] & tcr->base_mask; } *table |= (address >> 18) & 0x3ffc; return true; } static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, - int is_user, hwaddr *phys_ptr, + ARMMMUIdx mmu_idx, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { CPUState *cs = CPU(arm_env_get_cpu(env)); @@ -4556,10 +4791,11 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, int domain = 0; int domain_prot; hwaddr phys_addr; + uint32_t dacr; /* Pagetable walk. */ /* Lookup l1 descriptor. */ - if (!get_level1_table_address(env, &table, address)) { + if (!get_level1_table_address(env, mmu_idx, &table, address)) { /* Section translation fault if page walk is disabled by PD0 or PD1 */ code = 5; goto do_fault; @@ -4567,7 +4803,12 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, desc = ldl_phys(cs->as, table); type = (desc & 3); domain = (desc >> 5) & 0x0f; - domain_prot = (A32_BANKED_CURRENT_REG_GET(env, dacr) >> (domain * 2)) & 3; + if (regime_el(env, mmu_idx) == 1) { + dacr = env->cp15.dacr_ns; + } else { + dacr = env->cp15.dacr_s; + } + domain_prot = (dacr >> (domain * 2)) & 3; if (type == 0) { /* Section translation fault. */ code = 5; @@ -4588,13 +4829,13 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, *page_size = 1024 * 1024; } else { /* Lookup l2 entry. */ - if (type == 1) { - /* Coarse pagetable. */ - table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); - } else { - /* Fine pagetable. */ - table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); - } + if (type == 1) { + /* Coarse pagetable. */ + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); + } else { + /* Fine pagetable. */ + table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); + } desc = ldl_phys(cs->as, table); switch (desc & 3) { case 0: /* Page translation fault. */ @@ -4611,17 +4852,17 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, *page_size = 0x1000; break; case 3: /* 1k page. */ - if (type == 1) { - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - phys_addr = (desc & 0xfffff000) | (address & 0xfff); - } else { - /* Page translation fault. */ - code = 7; - goto do_fault; - } - } else { - phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); - } + if (type == 1) { + if (arm_feature(env, ARM_FEATURE_XSCALE)) { + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + } else { + /* Page translation fault. */ + code = 7; + goto do_fault; + } + } else { + phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); + } ap = (desc >> 4) & 3; *page_size = 0x400; break; @@ -4631,7 +4872,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, } code = 15; } - *prot = check_ap(env, ap, domain_prot, access_type, is_user); + *prot = check_ap(env, mmu_idx, ap, domain_prot, access_type); if (!*prot) { /* Access permission fault. */ goto do_fault; @@ -4644,7 +4885,7 @@ do_fault: } static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, - int is_user, hwaddr *phys_ptr, + ARMMMUIdx mmu_idx, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { CPUState *cs = CPU(arm_env_get_cpu(env)); @@ -4658,10 +4899,11 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, int domain = 0; int domain_prot; hwaddr phys_addr; + uint32_t dacr; /* Pagetable walk. */ /* Lookup l1 descriptor. */ - if (!get_level1_table_address(env, &table, address)) { + if (!get_level1_table_address(env, mmu_idx, &table, address)) { /* Section translation fault if page walk is disabled by PD0 or PD1 */ code = 5; goto do_fault; @@ -4679,7 +4921,12 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, /* Page or Section. */ domain = (desc >> 5) & 0x0f; } - domain_prot = (A32_BANKED_CURRENT_REG_GET(env, dacr) >> (domain * 2)) & 3; + if (regime_el(env, mmu_idx) == 1) { + dacr = env->cp15.dacr_ns; + } else { + dacr = env->cp15.dacr_s; + } + domain_prot = (dacr >> (domain * 2)) & 3; if (domain_prot == 0 || domain_prot == 2) { if (type != 1) { code = 9; /* Section domain fault. */ @@ -4733,20 +4980,20 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, if (domain_prot == 3) { *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; } else { - if (pxn && !is_user) { + if (pxn && !regime_is_user(env, mmu_idx)) { xn = 1; } if (xn && access_type == 2) goto do_fault; /* The simplified model uses AP[0] as an access control bit. */ - if ((A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_AFE) + if ((regime_sctlr(env, mmu_idx) & SCTLR_AFE) && (ap & 1) == 0) { /* Access flag fault. */ code = (code == 15) ? 6 : 3; goto do_fault; } - *prot = check_ap(env, ap, domain_prot, access_type, is_user); + *prot = check_ap(env, mmu_idx, ap, domain_prot, access_type); if (!*prot) { /* Access permission fault. */ goto do_fault; @@ -4771,7 +5018,7 @@ typedef enum { } MMUFaultType; static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, - int access_type, int is_user, + int access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, int *prot, target_ulong *page_size_ptr) { @@ -4791,9 +5038,17 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, int32_t granule_sz = 9; int32_t va_size = 32; int32_t tbi = 0; - TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1]; - - if (arm_el_is_aa64(env, 1)) { + bool is_user; + TCR *tcr = regime_tcr(env, mmu_idx); + + /* TODO: + * This code assumes we're either a 64-bit EL1 or a 32-bit PL1; + * it doesn't handle the different format TCR for TCR_EL2, TCR_EL3, + * and VTCR_EL2, or the fact that those regimes don't have a split + * TTBR0/TTBR1. Attribute and permission bit handling should also + * be checked when adding support for those page table walks. + */ + if (arm_el_is_aa64(env, regime_el(env, mmu_idx))) { va_size = 64; if (extract64(address, 55, 1)) tbi = extract64(tcr->raw_tcr, 38, 1); @@ -4808,12 +5063,12 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32: */ uint32_t t0sz = extract32(tcr->raw_tcr, 0, 6); - if (arm_el_is_aa64(env, 1)) { + if (va_size == 64) { t0sz = MIN(t0sz, 39); t0sz = MAX(t0sz, 16); } uint32_t t1sz = extract32(tcr->raw_tcr, 16, 6); - if (arm_el_is_aa64(env, 1)) { + if (va_size == 64) { t1sz = MIN(t1sz, 39); t1sz = MAX(t1sz, 16); } @@ -4868,6 +5123,10 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, } } + /* Here we should have set up all the parameters for the translation: + * va_size, ttbr, epd, tsz, granule_sz, tbi + */ + if (epd) { /* Translation table walk disabled => Translation fault on TLB miss */ goto do_fault; @@ -4953,6 +5212,7 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, goto do_fault; } fault_type = permission_fault; + is_user = regime_is_user(env, mmu_idx); if (is_user && !(attrs & (1 << 4))) { /* Unprivileged access not enabled */ goto do_fault; @@ -4987,27 +5247,31 @@ do_fault: } static int get_phys_addr_mpu(CPUARMState *env, uint32_t address, - int access_type, int is_user, + int access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, int *prot) { int n; uint32_t mask; uint32_t base; + bool is_user = regime_is_user(env, mmu_idx); *phys_ptr = address; for (n = 7; n >= 0; n--) { - base = env->cp15.c6_region[n]; - if ((base & 1) == 0) - continue; - mask = 1 << ((base >> 1) & 0x1f); - /* Keep this shift separate from the above to avoid an - (undefined) << 32. */ - mask = (mask << 1) - 1; - if (((base ^ address) & ~mask) == 0) - break; - } - if (n < 0) - return 2; + base = env->cp15.c6_region[n]; + if ((base & 1) == 0) { + continue; + } + mask = 1 << ((base >> 1) & 0x1f); + /* Keep this shift separate from the above to avoid an + (undefined) << 32. */ + mask = (mask << 1) - 1; + if (((base ^ address) & ~mask) == 0) { + break; + } + } + if (n < 0) { + return 2; + } if (access_type == 2) { mask = env->cp15.pmsav5_insn_ap; @@ -5017,31 +5281,34 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address, mask = (mask >> (n * 4)) & 0xf; switch (mask) { case 0: - return 1; + return 1; case 1: - if (is_user) - return 1; - *prot = PAGE_READ | PAGE_WRITE; - break; + if (is_user) { + return 1; + } + *prot = PAGE_READ | PAGE_WRITE; + break; case 2: - *prot = PAGE_READ; - if (!is_user) - *prot |= PAGE_WRITE; - break; + *prot = PAGE_READ; + if (!is_user) { + *prot |= PAGE_WRITE; + } + break; case 3: - *prot = PAGE_READ | PAGE_WRITE; - break; + *prot = PAGE_READ | PAGE_WRITE; + break; case 5: - if (is_user) - return 1; - *prot = PAGE_READ; - break; + if (is_user) { + return 1; + } + *prot = PAGE_READ; + break; case 6: - *prot = PAGE_READ; - break; + *prot = PAGE_READ; + break; default: - /* Bad permission. */ - return 1; + /* Bad permission. */ + return 1; } *prot |= PAGE_EXEC; return 0; @@ -5065,44 +5332,60 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address, * @env: CPUARMState * @address: virtual address to get physical address for * @access_type: 0 for read, 1 for write, 2 for execute - * @is_user: 0 for privileged access, 1 for user + * @mmu_idx: MMU index indicating required translation regime * @phys_ptr: set to the physical address corresponding to the virtual address * @prot: set to the permissions for the page containing phys_ptr * @page_size: set to the size of the page containing phys_ptr */ static inline int get_phys_addr(CPUARMState *env, target_ulong address, - int access_type, int is_user, + int access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { - /* This is not entirely correct as get_phys_addr() can also be called - * from ats_write() for an address translation of a specific regime. - */ - uint32_t sctlr = A32_BANKED_CURRENT_REG_GET(env, sctlr); + if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) { + /* TODO: when we support EL2 we should here call ourselves recursively + * to do the stage 1 and then stage 2 translations. The ldl_phys + * calls for stage 1 will also need changing. + * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. + */ + assert(!arm_feature(env, ARM_FEATURE_EL2)); + mmu_idx += ARMMMUIdx_S1NSE0; + } - /* Fast Context Switch Extension. */ - if (address < 0x02000000) { - address += A32_BANKED_CURRENT_REG_GET(env, fcseidr); + /* Fast Context Switch Extension. This doesn't exist at all in v8. + * In v7 and earlier it affects all stage 1 translations. + */ + if (address < 0x02000000 && mmu_idx != ARMMMUIdx_S2NS + && !arm_feature(env, ARM_FEATURE_V8)) { + if (regime_el(env, mmu_idx) == 3) { + address += env->cp15.fcseidr_s; + } else { + address += env->cp15.fcseidr_ns; + } } - if ((sctlr & SCTLR_M) == 0) { + if (regime_translation_disabled(env, mmu_idx)) { /* MMU/MPU disabled. */ *phys_ptr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; *page_size = TARGET_PAGE_SIZE; return 0; - } else if (arm_feature(env, ARM_FEATURE_MPU)) { + } + + if (arm_feature(env, ARM_FEATURE_MPU)) { *page_size = TARGET_PAGE_SIZE; - return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr, - prot); - } else if (extended_addresses_enabled(env)) { - return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr, + return get_phys_addr_mpu(env, address, access_type, mmu_idx, phys_ptr, + prot); + } + + if (regime_using_lpae_format(env, mmu_idx)) { + return get_phys_addr_lpae(env, address, access_type, mmu_idx, phys_ptr, prot, page_size); - } else if (sctlr & SCTLR_XP) { - return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr, + } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { + return get_phys_addr_v6(env, address, access_type, mmu_idx, phys_ptr, prot, page_size); } else { - return get_phys_addr_v5(env, address, access_type, is_user, phys_ptr, + return get_phys_addr_v5(env, address, access_type, mmu_idx, phys_ptr, prot, page_size); } } @@ -5115,12 +5398,11 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, hwaddr phys_addr; target_ulong page_size; int prot; - int ret, is_user; + int ret; uint32_t syn; bool same_el = (arm_current_el(env) != 0); - is_user = mmu_idx == MMU_USER_IDX; - ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot, + ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr, &prot, &page_size); if (ret == 0) { /* Map a single [sub]page. */ @@ -5156,12 +5438,14 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; hwaddr phys_addr; target_ulong page_size; int prot; int ret; - ret = get_phys_addr(&cpu->env, addr, 0, 0, &phys_addr, &prot, &page_size); + ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env), &phys_addr, + &prot, &page_size); if (ret != 0) { return -1; @@ -6242,7 +6526,7 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp) } else { return float64_set_sign(float64_maxnorm, float64_is_neg(f64)); } - } else if (f64_exp >= 1023 && fpst->flush_to_zero) { + } else if (f64_exp >= 2045 && fpst->flush_to_zero) { float_raise(float_flag_underflow, fpst); return float64_set_sign(float64_zero, float64_is_neg(f64)); } diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index ba16821737..033babf551 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -193,9 +193,12 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } + if (!write_list_to_kvmstate(cpu)) { + return EINVAL; + } + /* TODO: * FP state - * system registers */ return ret; } @@ -269,6 +272,14 @@ int kvm_arch_get_registers(CPUState *cs) } } + if (!write_kvmstate_to_list(cpu)) { + return EINVAL; + } + /* Note that it's OK to have registers which aren't in CPUState, + * so we can ignore a failure return here. + */ + write_list_to_cpustate(cpu); + /* TODO: other registers */ return ret; } diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 80d23597c7..acf4b162bd 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -123,6 +123,23 @@ void a64_translate_init(void) #endif } +static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s) +{ + /* Return the mmu_idx to use for A64 "unprivileged load/store" insns: + * if EL1, access as if EL0; otherwise access at current EL + */ + switch (s->mmu_idx) { + case ARMMMUIdx_S12NSE1: + return ARMMMUIdx_S12NSE0; + case ARMMMUIdx_S1SE1: + return ARMMMUIdx_S1SE0; + case ARMMMUIdx_S2NS: + g_assert_not_reached(); + default: + return s->mmu_idx; + } +} + void aarch64_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -2107,7 +2124,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); - int memidx = is_unpriv ? 1 : get_mem_index(s); + int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); if (is_store) { do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx); @@ -10922,14 +10939,15 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, dc->bswap_code = 0; dc->condexec_mask = 0; dc->condexec_cond = 0; + dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags); + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) - dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0); + dc->user = (dc->current_el == 0); #endif dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = cpu->cp_regs; - dc->current_el = arm_current_el(env); dc->features = env->features; /* Single step state. The code-generation logic here is: diff --git a/target-arm/translate.c b/target-arm/translate.c index bdfcdf169c..1c36b8b05e 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -113,6 +113,28 @@ void arm_translate_init(void) a64_translate_init(); } +static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s) +{ + /* Return the mmu_idx to use for A32/T32 "unprivileged load/store" + * insns: + * if PL2, UNPREDICTABLE (we choose to implement as if PL0) + * otherwise, access as if at PL0. + */ + switch (s->mmu_idx) { + case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */ + case ARMMMUIdx_S12NSE0: + case ARMMMUIdx_S12NSE1: + return ARMMMUIdx_S12NSE0; + case ARMMMUIdx_S1E3: + case ARMMMUIdx_S1SE0: + case ARMMMUIdx_S1SE1: + return ARMMMUIdx_S1SE0; + case ARMMMUIdx_S2NS: + default: + g_assert_not_reached(); + } +} + static inline TCGv_i32 load_cpu_offset(int offset) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -8739,6 +8761,10 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) ARCH(6T2); shift = (insn >> 7) & 0x1f; i = (insn >> 16) & 0x1f; + if (i < shift) { + /* UNPREDICTABLE; we choose to UNDEF */ + goto illegal_op; + } i = i + 1 - shift; if (rm == 15) { tmp = tcg_temp_new_i32(); @@ -8793,7 +8819,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) tmp2 = load_reg(s, rn); if ((insn & 0x01200000) == 0x00200000) { /* ldrt/strt */ - i = MMU_USER_IDX; + i = get_a32_user_mem_index(s); } else { i = get_mem_index(s); } @@ -10173,7 +10199,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw break; case 0xe: /* User privilege. */ tcg_gen_addi_i32(addr, addr, imm); - memidx = MMU_USER_IDX; + memidx = get_a32_user_mem_index(s); break; case 0x9: /* Post-decrement. */ imm = -imm; @@ -11032,8 +11058,10 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags); dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1; dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4; + dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags); + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) - dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0); + dc->user = (dc->current_el == 0); #endif dc->ns = ARM_TBFLAG_NS(tb->flags); dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags); @@ -11042,7 +11070,6 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags); dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags); dc->cp_regs = cpu->cp_regs; - dc->current_el = arm_current_el(env); dc->features = env->features; /* Single step state. The code-generation logic here is: diff --git a/target-arm/translate.h b/target-arm/translate.h index f6ee7892ba..a1eb5b5347 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -20,6 +20,7 @@ typedef struct DisasContext { #if !defined(CONFIG_USER_ONLY) int user; #endif + ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */ bool ns; /* Use non-secure CPREG bank on access */ bool cpacr_fpen; /* FP enabled via CPACR.FPEN */ bool vfp_enabled; /* FP enabled via FPSCR.EN */ @@ -69,7 +70,7 @@ static inline int arm_dc_feature(DisasContext *dc, int feature) static inline int get_mem_index(DisasContext *s) { - return s->current_el; + return s->mmu_idx; } /* target-specific extra values for is_jmp */ diff --git a/target-s390x/cc_helper.c b/target-s390x/cc_helper.c index 373eb176a1..00bc883a8a 100644 --- a/target-s390x/cc_helper.c +++ b/target-s390x/cc_helper.c @@ -179,16 +179,11 @@ static uint32_t cc_calc_subu_64(uint64_t a1, uint64_t a2, uint64_t ar) static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar) { - /* We had borrow-in if normal subtraction isn't equal. */ - int borrow_in = ar - (a1 - a2); int borrow_out; - /* If a2 was ULONG_MAX, and borrow_in, then a2 is logically 65 bits, - and we must have had borrow out. */ - if (borrow_in && a2 == (uint64_t)-1) { - borrow_out = 1; + if (ar != a1 - a2) { /* difference means borrow-in */ + borrow_out = (a2 >= a1); } else { - a2 += borrow_in; borrow_out = (a2 > a1); } @@ -285,16 +280,11 @@ static uint32_t cc_calc_subu_32(uint32_t a1, uint32_t a2, uint32_t ar) static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar) { - /* We had borrow-in if normal subtraction isn't equal. */ - int borrow_in = ar - (a1 - a2); int borrow_out; - /* If a2 was UINT_MAX, and borrow_in, then a2 is logically 65 bits, - and we must have had borrow out. */ - if (borrow_in && a2 == (uint32_t)-1) { - borrow_out = 1; + if (ar != a1 - a2) { /* difference means borrow-in */ + borrow_out = (a2 >= a1); } else { - a2 += borrow_in; borrow_out = (a2 > a1); } diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index c123b6f023..2e2554c4b3 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -133,7 +133,9 @@ typedef struct CPUS390XState { /* reset does memset(0) up to here */ - int cpu_num; + uint32_t cpu_num; + uint32_t machine_type; + uint8_t *storage_keys; uint64_t tod_offset; diff --git a/target-s390x/helper.h b/target-s390x/helper.h index faebfd96aa..8d2c8596bb 100644 --- a/target-s390x/helper.h +++ b/target-s390x/helper.h @@ -111,5 +111,8 @@ DEF_HELPER_FLAGS_2(sacf, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(ipte, TCG_CALL_NO_RWG, void, env, i64, i64) DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env) DEF_HELPER_2(lra, i64, env, i64) +DEF_HELPER_FLAGS_2(lura, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(lurag, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_3(stura, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(sturg, TCG_CALL_NO_WG, void, env, i64, i64) #endif diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def index 4d2feb6977..8d8e47e0bf 100644 --- a/target-s390x/insn-data.def +++ b/target-s390x/insn-data.def @@ -285,8 +285,12 @@ /* EXTRACT ACCESS */ C(0xb24f, EAR, RRE, Z, 0, 0, new, r1_32, ear, 0) +/* EXTRACT CPU ATTRIBUTE */ + C(0xeb4c, ECAG, RSY_a, GIE, 0, a2, r1, 0, ecag, 0) /* EXTRACT FPC */ C(0xb38c, EFPC, RRE, Z, 0, 0, new, r1_32, efpc, 0) +/* EXTRACT PSW */ + C(0xb98d, EPSW, RRE, Z, 0, 0, 0, 0, epsw, 0) /* FIND LEFTMOST ONE */ C(0xb983, FLOGR, RRE, EI, 0, r2_o, r1_P, 0, flogr, 0) @@ -566,6 +570,10 @@ /* SET ACCESS */ C(0xb24e, SAR, RRE, Z, 0, r2_o, 0, 0, sar, 0) +/* SET ADDRESSING MODE */ + D(0x010c, SAM24, E, Z, 0, 0, 0, 0, sam, 0, 0) + D(0x010d, SAM31, E, Z, 0, 0, 0, 0, sam, 0, 1) + D(0x010e, SAM64, E, Z, 0, 0, 0, 0, sam, 0, 3) /* SET FPC */ C(0xb384, SFPC, RRE, Z, 0, r1_o, 0, 0, sfpc, 0) /* SET FPC AND SIGNAL */ @@ -733,6 +741,9 @@ C(0xb100, LRA, RX_a, Z, 0, a2, r1, 0, lra, 0) C(0xe313, LRAY, RXY_a, LD, 0, a2, r1, 0, lra, 0) C(0xe303, LRAG, RXY_a, Z, 0, a2, r1, 0, lra, 0) +/* LOAD USING REAL ADDRESS */ + C(0xb24b, LURA, RRE, Z, 0, r2, new, r1_32, lura, 0) + C(0xb905, LURAG, RRE, Z, 0, r2, r1, 0, lurag, 0) /* MOVE TO PRIMARY */ C(0xda00, MVCP, SS_d, Z, la1, a2, 0, 0, mvcp, 0) /* MOVE TO SECONDARY */ @@ -743,10 +754,6 @@ C(0xb22a, RRBE, RRE, Z, 0, r2_o, 0, 0, rrbe, 0) /* SERVICE CALL LOGICAL PROCESSOR (PV hypercall) */ C(0xb220, SERVC, RRE, Z, r1_o, r2_o, 0, 0, servc, 0) -/* SET ADDRESSING MODE */ - D(0x010c, SAM24, E, Z, 0, 0, 0, 0, sam, 0, 0) - D(0x010d, SAM31, E, Z, 0, 0, 0, 0, sam, 0, 1) - D(0x010e, SAM64, E, Z, 0, 0, 0, 0, sam, 0, 3) /* SET ADDRESS SPACE CONTROL FAST */ C(0xb279, SACF, S, Z, 0, a2, 0, 0, sacf, 0) /* SET CLOCK */ @@ -794,6 +801,7 @@ C(0xad00, STOSM, SI, Z, la1, 0, 0, 0, stnosm, 0) /* STORE USING REAL ADDRESS */ C(0xb246, STURA, RRE, Z, r1_o, r2_o, 0, 0, stura, 0) + C(0xb925, STURG, RRE, Z, r1_o, r2_o, 0, 0, sturg, 0) /* TEST PROTECTION */ C(0xe501, TPROT, SSE, Z, la1, a2, 0, 0, tprot, 0) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index dcd75055c1..6f2d5b4924 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -1046,7 +1046,7 @@ static void kvm_handle_diag_308(S390CPU *cpu, struct kvm_run *run) uint64_t r1, r3; cpu_synchronize_state(CPU(cpu)); - r1 = (run->s390_sieic.ipa & 0x00f0) >> 8; + r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; r3 = run->s390_sieic.ipa & 0x000f; handle_diag_308(&cpu->env, r1, r3); } @@ -1091,7 +1091,7 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) break; default: DPRINTF("KVM: unknown DIAG: 0x%x\n", func_code); - r = -1; + enter_pgmcheck(cpu, PGM_SPECIFICATION); break; } diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c index 5a55de86a1..d67b345ad1 100644 --- a/target-s390x/mem_helper.c +++ b/target-s390x/mem_helper.c @@ -490,10 +490,18 @@ uint32_t HELPER(ex)(CPUS390XState *env, uint32_t cc, uint64_t v1, helper_mvc(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); break; + case 0x400: + cc = helper_nc(env, l, get_address(env, 0, b1, d1), + get_address(env, 0, b2, d2)); + break; case 0x500: cc = helper_clc(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); break; + case 0x600: + cc = helper_oc(env, l, get_address(env, 0, b1, d1), + get_address(env, 0, b2, d2)); + break; case 0x700: cc = helper_xc(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); @@ -1034,12 +1042,34 @@ void HELPER(ptlb)(CPUS390XState *env) tlb_flush(CPU(cpu), 1); } +/* load using real address */ +uint64_t HELPER(lura)(CPUS390XState *env, uint64_t addr) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + return (uint32_t)ldl_phys(cs->as, get_address(env, 0, 0, addr)); +} + +uint64_t HELPER(lurag)(CPUS390XState *env, uint64_t addr) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + return ldq_phys(cs->as, get_address(env, 0, 0, addr)); +} + /* store using real address */ void HELPER(stura)(CPUS390XState *env, uint64_t addr, uint64_t v1) { CPUState *cs = CPU(s390_env_get_cpu(env)); - stw_phys(cs->as, get_address(env, 0, 0, addr), (uint32_t)v1); + stl_phys(cs->as, get_address(env, 0, 0, addr), (uint32_t)v1); +} + +void HELPER(sturg)(CPUS390XState *env, uint64_t addr, uint64_t v1) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + stq_phys(cs->as, get_address(env, 0, 0, addr), v1); } /* load real address */ diff --git a/target-s390x/translate.c b/target-s390x/translate.c index ab01bc004e..8b36eca718 100644 --- a/target-s390x/translate.c +++ b/target-s390x/translate.c @@ -317,12 +317,14 @@ static inline void gen_illegal_opcode(DisasContext *s) gen_program_exception(s, PGM_SPECIFICATION); } -static inline void check_privileged(DisasContext *s) +#ifndef CONFIG_USER_ONLY +static void check_privileged(DisasContext *s) { if (s->tb->flags & (PSW_MASK_PSTATE >> 32)) { gen_program_exception(s, PGM_PRIVILEGED); } } +#endif static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2) { @@ -2045,12 +2047,37 @@ static ExitStatus op_ear(DisasContext *s, DisasOps *o) return NO_EXIT; } +static ExitStatus op_ecag(DisasContext *s, DisasOps *o) +{ + /* No cache information provided. */ + tcg_gen_movi_i64(o->out, -1); + return NO_EXIT; +} + static ExitStatus op_efpc(DisasContext *s, DisasOps *o) { tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, fpc)); return NO_EXIT; } +static ExitStatus op_epsw(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s->fields, r1); + int r2 = get_field(s->fields, r2); + TCGv_i64 t = tcg_temp_new_i64(); + + /* Note the "subsequently" in the PoO, which implies a defined result + if r1 == r2. Thus we cannot defer these writes to an output hook. */ + tcg_gen_shri_i64(t, psw_mask, 32); + store_reg32_i64(r1, t); + if (r2 != 0) { + store_reg32_i64(r2, psw_mask); + } + + tcg_temp_free_i64(t); + return NO_EXIT; +} + static ExitStatus op_ex(DisasContext *s, DisasOps *o) { /* ??? Perhaps a better way to implement EXECUTE is to set a bit in @@ -2460,6 +2487,24 @@ static ExitStatus op_lm64(DisasContext *s, DisasOps *o) return NO_EXIT; } +#ifndef CONFIG_USER_ONLY +static ExitStatus op_lura(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + potential_page_fault(s); + gen_helper_lura(o->out, cpu_env, o->in2); + return NO_EXIT; +} + +static ExitStatus op_lurag(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + potential_page_fault(s); + gen_helper_lurag(o->out, cpu_env, o->in2); + return NO_EXIT; +} +#endif + static ExitStatus op_mov2(DisasContext *s, DisasOps *o) { o->out = o->in2; @@ -2925,19 +2970,42 @@ static ExitStatus op_sacf(DisasContext *s, DisasOps *o) /* Addressing mode has changed, so end the block. */ return EXIT_PC_STALE; } +#endif static ExitStatus op_sam(DisasContext *s, DisasOps *o) { int sam = s->insn->data; - TCGv_i64 tsam = tcg_const_i64(sam); + TCGv_i64 tsam; + uint64_t mask; - /* Overwrite PSW_MASK_64 and PSW_MASK_32 */ - tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2); + switch (sam) { + case 0: + mask = 0xffffff; + break; + case 1: + mask = 0x7fffffff; + break; + default: + mask = -1; + break; + } + /* Bizzare but true, we check the address of the current insn for the + specification exception, not the next to be executed. Thus the PoO + documents that Bad Things Happen two bytes before the end. */ + if (s->pc & ~mask) { + gen_program_exception(s, PGM_SPECIFICATION); + return EXIT_NORETURN; + } + s->next_pc &= mask; + + tsam = tcg_const_i64(sam); + tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2); tcg_temp_free_i64(tsam); + + /* Always exit the TB, since we (may have) changed execution mode. */ return EXIT_PC_STALE; } -#endif static ExitStatus op_sar(DisasContext *s, DisasOps *o) { @@ -3221,8 +3289,14 @@ static ExitStatus op_stctl(DisasContext *s, DisasOps *o) static ExitStatus op_stidp(DisasContext *s, DisasOps *o) { + TCGv_i64 t1 = tcg_temp_new_i64(); + check_privileged(s); tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, cpu_num)); + tcg_gen_ld32u_i64(t1, cpu_env, offsetof(CPUS390XState, machine_type)); + tcg_gen_deposit_i64(o->out, o->out, t1, 32, 32); + tcg_temp_free_i64(t1); + return NO_EXIT; } @@ -3317,6 +3391,14 @@ static ExitStatus op_stura(DisasContext *s, DisasOps *o) gen_helper_stura(cpu_env, o->in2, o->in1); return NO_EXIT; } + +static ExitStatus op_sturg(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + potential_page_fault(s); + gen_helper_sturg(cpu_env, o->in2, o->in1); + return NO_EXIT; +} #endif static ExitStatus op_st8(DisasContext *s, DisasOps *o) diff --git a/tests/Makefile b/tests/Makefile index c2e2e52f22..5caccf765a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -60,6 +60,8 @@ gcov-files-test-mul64-y = util/host-utils.c check-unit-y += tests/test-int128$(EXESUF) # all code tested by test-int128 is inside int128.h gcov-files-test-int128-y = +check-unit-y += tests/rcutorture$(EXESUF) +gcov-files-rcutorture-y = util/rcu.c check-unit-y += tests/test-bitops$(EXESUF) check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF) check-unit-y += tests/check-qom-interface$(EXESUF) @@ -223,7 +225,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \ tests/test-qmp-commands.o tests/test-visitor-serialization.o \ tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \ - tests/test-opts-visitor.o tests/test-qmp-event.o + tests/test-opts-visitor.o tests/test-qmp-event.o \ + tests/rcutorture.o test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \ tests/test-qapi-event.o @@ -252,6 +255,8 @@ tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o libqemuutil.a tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o tests/test-int128$(EXESUF): tests/test-int128.o +tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a + tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ hw/core/irq.o \ @@ -261,7 +266,8 @@ tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ libqemuutil.a libqemustub.a tests/test-vmstate$(EXESUF): tests/test-vmstate.o \ migration/vmstate.o migration/qemu-file.o migration/qemu-file-buf.o \ - migration/qemu-file-unix.o \ + migration/qemu-file-unix.o qjson.o \ + $(qom-core-obj) \ libqemuutil.a libqemustub.a tests/test-qapi-types.c tests/test-qapi-types.h :\ diff --git a/tests/rcutorture.c b/tests/rcutorture.c new file mode 100644 index 0000000000..60a2ccfe2e --- /dev/null +++ b/tests/rcutorture.c @@ -0,0 +1,451 @@ +/* + * rcutorture.c: simple user-level performance/stress test of RCU. + * + * Usage: + * ./rcu <nreaders> rperf [ <seconds> ] + * Run a read-side performance test with the specified + * number of readers for <seconds> seconds. + * ./rcu <nupdaters> uperf [ <seconds> ] + * Run an update-side performance test with the specified + * number of updaters and specified duration. + * ./rcu <nreaders> perf [ <seconds> ] + * Run a combined read/update performance test with the specified + * number of readers and one updater and specified duration. + * + * The above tests produce output as follows: + * + * n_reads: 46008000 n_updates: 146026 nreaders: 2 nupdaters: 1 duration: 1 + * ns/read: 43.4707 ns/update: 6848.1 + * + * The first line lists the total number of RCU reads and updates executed + * during the test, the number of reader threads, the number of updater + * threads, and the duration of the test in seconds. The second line + * lists the average duration of each type of operation in nanoseconds, + * or "nan" if the corresponding type of operation was not performed. + * + * ./rcu <nreaders> stress [ <seconds> ] + * Run a stress test with the specified number of readers and + * one updater. + * + * This test produces output as follows: + * + * n_reads: 114633217 n_updates: 3903415 n_mberror: 0 + * rcu_stress_count: 114618391 14826 0 0 0 0 0 0 0 0 0 + * + * The first line lists the number of RCU read and update operations + * executed, followed by the number of memory-ordering violations + * (which will be zero in a correct RCU implementation). The second + * line lists the number of readers observing progressively more stale + * data. A correct RCU implementation will have all but the first two + * numbers non-zero. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) 2008 Paul E. McKenney, IBM Corporation. + */ + +/* + * Test variables. + */ + +#include <glib.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "qemu/atomic.h" +#include "qemu/rcu.h" +#include "qemu/compiler.h" +#include "qemu/thread.h" + +long long n_reads = 0LL; +long n_updates = 0L; +int nthreadsrunning; + +#define GOFLAG_INIT 0 +#define GOFLAG_RUN 1 +#define GOFLAG_STOP 2 + +static volatile int goflag = GOFLAG_INIT; + +#define RCU_READ_RUN 1000 + +#define NR_THREADS 100 +static QemuThread threads[NR_THREADS]; +static struct rcu_reader_data *data[NR_THREADS]; +static int n_threads; + +static void create_thread(void *(*func)(void *)) +{ + if (n_threads >= NR_THREADS) { + fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS); + exit(-1); + } + qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads], + QEMU_THREAD_JOINABLE); + n_threads++; +} + +static void wait_all_threads(void) +{ + int i; + + for (i = 0; i < n_threads; i++) { + qemu_thread_join(&threads[i]); + } + n_threads = 0; +} + +/* + * Performance test. + */ + +static void *rcu_read_perf_test(void *arg) +{ + int i; + long long n_reads_local = 0; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + for (i = 0; i < RCU_READ_RUN; i++) { + rcu_read_lock(); + rcu_read_unlock(); + } + n_reads_local += RCU_READ_RUN; + } + atomic_add(&n_reads, n_reads_local); + + rcu_unregister_thread(); + return NULL; +} + +static void *rcu_update_perf_test(void *arg) +{ + long long n_updates_local = 0; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + synchronize_rcu(); + n_updates_local++; + } + atomic_add(&n_updates, n_updates_local); + + rcu_unregister_thread(); + return NULL; +} + +static void perftestinit(void) +{ + nthreadsrunning = 0; +} + +static void perftestrun(int nthreads, int duration, int nreaders, int nupdaters) +{ + while (atomic_read(&nthreadsrunning) < nthreads) { + g_usleep(1000); + } + goflag = GOFLAG_RUN; + g_usleep(duration * G_USEC_PER_SEC); + goflag = GOFLAG_STOP; + wait_all_threads(); + printf("n_reads: %lld n_updates: %ld nreaders: %d nupdaters: %d duration: %d\n", + n_reads, n_updates, nreaders, nupdaters, duration); + printf("ns/read: %g ns/update: %g\n", + ((duration * 1000*1000*1000.*(double)nreaders) / + (double)n_reads), + ((duration * 1000*1000*1000.*(double)nupdaters) / + (double)n_updates)); + exit(0); +} + +static void perftest(int nreaders, int duration) +{ + int i; + + perftestinit(); + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_perf_test); + } + create_thread(rcu_update_perf_test); + perftestrun(i + 1, duration, nreaders, 1); +} + +static void rperftest(int nreaders, int duration) +{ + int i; + + perftestinit(); + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_perf_test); + } + perftestrun(i, duration, nreaders, 0); +} + +static void uperftest(int nupdaters, int duration) +{ + int i; + + perftestinit(); + for (i = 0; i < nupdaters; i++) { + create_thread(rcu_update_perf_test); + } + perftestrun(i, duration, 0, nupdaters); +} + +/* + * Stress test. + */ + +#define RCU_STRESS_PIPE_LEN 10 + +struct rcu_stress { + int pipe_count; + int mbtest; +}; + +struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } }; +struct rcu_stress *rcu_stress_current; +int rcu_stress_idx; + +int n_mberror; +long long rcu_stress_count[RCU_STRESS_PIPE_LEN + 1]; + + +static void *rcu_read_stress_test(void *arg) +{ + int i; + int itercnt = 0; + struct rcu_stress *p; + int pc; + long long n_reads_local = 0; + volatile int garbage = 0; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + rcu_read_lock(); + p = atomic_rcu_read(&rcu_stress_current); + if (p->mbtest == 0) { + n_mberror++; + } + rcu_read_lock(); + for (i = 0; i < 100; i++) { + garbage++; + } + rcu_read_unlock(); + pc = p->pipe_count; + rcu_read_unlock(); + if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0)) { + pc = RCU_STRESS_PIPE_LEN; + } + atomic_inc(&rcu_stress_count[pc]); + n_reads_local++; + if ((++itercnt % 0x1000) == 0) { + synchronize_rcu(); + } + } + atomic_add(&n_reads, n_reads_local); + + rcu_unregister_thread(); + return NULL; +} + +static void *rcu_update_stress_test(void *arg) +{ + int i; + struct rcu_stress *p; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + i = rcu_stress_idx + 1; + if (i >= RCU_STRESS_PIPE_LEN) { + i = 0; + } + p = &rcu_stress_array[i]; + p->mbtest = 0; + smp_mb(); + p->pipe_count = 0; + p->mbtest = 1; + atomic_rcu_set(&rcu_stress_current, p); + rcu_stress_idx = i; + for (i = 0; i < RCU_STRESS_PIPE_LEN; i++) { + if (i != rcu_stress_idx) { + rcu_stress_array[i].pipe_count++; + } + } + synchronize_rcu(); + n_updates++; + } + + rcu_unregister_thread(); + return NULL; +} + +static void *rcu_fake_update_stress_test(void *arg) +{ + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + synchronize_rcu(); + g_usleep(1000); + } + + rcu_unregister_thread(); + return NULL; +} + +static void stresstest(int nreaders, int duration) +{ + int i; + + rcu_stress_current = &rcu_stress_array[0]; + rcu_stress_current->pipe_count = 0; + rcu_stress_current->mbtest = 1; + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_stress_test); + } + create_thread(rcu_update_stress_test); + for (i = 0; i < 5; i++) { + create_thread(rcu_fake_update_stress_test); + } + goflag = GOFLAG_RUN; + g_usleep(duration * G_USEC_PER_SEC); + goflag = GOFLAG_STOP; + wait_all_threads(); + printf("n_reads: %lld n_updates: %ld n_mberror: %d\n", + n_reads, n_updates, n_mberror); + printf("rcu_stress_count:"); + for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) { + printf(" %lld", rcu_stress_count[i]); + } + printf("\n"); + exit(0); +} + +/* GTest interface */ + +static void gtest_stress(int nreaders, int duration) +{ + int i; + + rcu_stress_current = &rcu_stress_array[0]; + rcu_stress_current->pipe_count = 0; + rcu_stress_current->mbtest = 1; + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_stress_test); + } + create_thread(rcu_update_stress_test); + for (i = 0; i < 5; i++) { + create_thread(rcu_fake_update_stress_test); + } + goflag = GOFLAG_RUN; + g_usleep(duration * G_USEC_PER_SEC); + goflag = GOFLAG_STOP; + wait_all_threads(); + g_assert_cmpint(n_mberror, ==, 0); + for (i = 2; i <= RCU_STRESS_PIPE_LEN; i++) { + g_assert_cmpint(rcu_stress_count[i], ==, 0); + } +} + +static void gtest_stress_1_1(void) +{ + gtest_stress(1, 1); +} + +static void gtest_stress_10_1(void) +{ + gtest_stress(10, 1); +} + +static void gtest_stress_1_5(void) +{ + gtest_stress(1, 5); +} + +static void gtest_stress_10_5(void) +{ + gtest_stress(10, 5); +} + +/* + * Mainprogram. + */ + +static void usage(int argc, char *argv[]) +{ + fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]); + exit(-1); +} + +int main(int argc, char *argv[]) +{ + int nreaders = 1; + int duration = 1; + + if (argc >= 2 && argv[1][0] == '-') { + g_test_init(&argc, &argv, NULL); + if (g_test_quick()) { + g_test_add_func("/rcu/torture/1reader", gtest_stress_1_1); + g_test_add_func("/rcu/torture/10readers", gtest_stress_10_1); + } else { + g_test_add_func("/rcu/torture/1reader", gtest_stress_1_5); + g_test_add_func("/rcu/torture/10readers", gtest_stress_10_5); + } + return g_test_run(); + } + + if (argc >= 2) { + nreaders = strtoul(argv[1], NULL, 0); + } + if (argc > 3) { + duration = strtoul(argv[3], NULL, 0); + } + if (argc < 3 || strcmp(argv[2], "stress") == 0) { + stresstest(nreaders, duration); + } else if (strcmp(argv[2], "rperf") == 0) { + rperftest(nreaders, duration); + } else if (strcmp(argv[2], "uperf") == 0) { + uperftest(nreaders, duration); + } else if (strcmp(argv[2], "perf") == 0) { + perftest(nreaders, duration); + } + usage(argc, argv); + return 0; +} diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index 39b7b01734..1d620e04fb 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -85,7 +85,7 @@ static void save_vmstate(const VMStateDescription *desc, void *obj) QEMUFile *f = open_test_file(true); /* Save file with vmstate */ - vmstate_save_state(f, desc, obj); + vmstate_save_state(f, desc, obj, NULL); qemu_put_byte(f, QEMU_VM_EOF); g_assert(!qemu_file_get_error(f)); qemu_fclose(f); @@ -394,7 +394,7 @@ static void test_save_noskip(void) QEMUFile *fsave = qemu_bufopen("w", NULL); TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4, .e = 5, .f = 6, .skip_c_e = false }; - vmstate_save_state(fsave, &vmstate_skipping, &obj); + vmstate_save_state(fsave, &vmstate_skipping, &obj, NULL); g_assert(!qemu_file_get_error(fsave)); uint8_t expected[] = { @@ -414,7 +414,7 @@ static void test_save_skip(void) QEMUFile *fsave = qemu_bufopen("w", NULL); TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4, .e = 5, .f = 6, .skip_c_e = true }; - vmstate_save_state(fsave, &vmstate_skipping, &obj); + vmstate_save_state(fsave, &vmstate_skipping, &obj, NULL); g_assert(!qemu_file_get_error(fsave)); uint8_t expected[] = { diff --git a/trace-events b/trace-events index 04f5df2526..907da7ed8a 100644 --- a/trace-events +++ b/trace-events @@ -1142,8 +1142,11 @@ vmware_scratch_write(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ %d bpp" # savevm.c +qemu_loadvm_state_section(unsigned int section_type) "%d" +qemu_loadvm_state_section_partend(uint32_t section_id) "%u" +qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u" -savevm_section_end(const char *id, unsigned int section_id) "%s, section_id %u" +savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d" savevm_state_begin(void) "" savevm_state_iterate(void) "" savevm_state_complete(void) "" @@ -1154,6 +1157,12 @@ qemu_announce_self_iter(const char *mac) "%s" # vmstate.c vmstate_load_field_error(const char *field, int ret) "field \"%s\" load failed, ret = %d" +vmstate_load_state(const char *name, int version_id) "%s v%d" +vmstate_load_state_end(const char *name, const char *reason, int val) "%s %s/%d" +vmstate_load_state_field(const char *name, const char *field) "%s:%s" +vmstate_subsection_load(const char *parent) "%s" +vmstate_subsection_load_bad(const char *parent, const char *sub) "%s: %s" +vmstate_subsection_load_good(const char *parent) "%s" # qemu-file.c qemu_file_fclose(void) "" @@ -1326,6 +1335,68 @@ migrate_fd_cancel(void) "" migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 +# migration/rdma.c +__qemu_rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" +__qemu_rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" +qemu_dma_accept_incoming_migration(void) "" +qemu_dma_accept_incoming_migration_accepted(void) "" +qemu_rdma_accept_pin_state(bool pin) "%d" +qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p" +qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")" +qemu_rdma_block_for_wrid_miss_b(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "B Wanted wrid %s (%d) but got %s (%" PRIu64 ")" +qemu_rdma_cleanup_disconnect(void) "" +qemu_rdma_cleanup_waiting_for_disconnect(void) "" +qemu_rdma_close(void) "" +qemu_rdma_connect_pin_all_requested(void) "" +qemu_rdma_connect_pin_all_outcome(bool pin) "%d" +qemu_rdma_dest_init_trying(const char *host, const char *ip) "%s => %s" +qemu_rdma_dump_gid(const char *who, const char *src, const char *dst) "%s Source GID: %s, Dest GID: %s" +qemu_rdma_exchange_get_response_start(const char *desc) "CONTROL: %s receiving..." +qemu_rdma_exchange_get_response_none(const char *desc, int type) "Surprise: got %s (%d)" +qemu_rdma_exchange_send_issue_callback(void) "" +qemu_rdma_exchange_send_waiting(const char *desc) "Waiting for response %s" +qemu_rdma_exchange_send_received(const char *desc) "Response %s received." +qemu_rdma_fill(int64_t control_len, int size) "RDMA %" PRId64 " of %d bytes already in buffer" +qemu_rdma_init_ram_blocks(int blocks) "Allocated %d local ram block structures" +qemu_rdma_poll_recv(const char *compstr, int64_t comp, int64_t id, int sent) "completion %s #%" PRId64 " received (%" PRId64 ") left %d" +qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block, uint64_t chunk, void *local, void *remote) "completions %s (%" PRId64 ") left %d, block %" PRIu64 ", chunk: %" PRIu64 " %p %p" +qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d" +qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.." +qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p" +qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64 +qemu_rdma_registration_handle_finished(void) "" +qemu_rdma_registration_handle_ram_blocks(void) "" +qemu_rdma_registration_handle_register(int requests) "%d requests" +qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64 +qemu_rdma_registration_handle_register_rkey(int rkey) "%x" +qemu_rdma_registration_handle_unregister(int requests) "%d requests" +qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64 +qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64 +qemu_rdma_registration_handle_wait(uint64_t flags) "Waiting for next request %" PRIu64 +qemu_rdma_registration_start(uint64_t flags) "%" PRIu64 +qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64 +qemu_rdma_registration_stop_ram(void) "" +qemu_rdma_resolve_host_trying(const char *host, const char *ip) "Trying %s => %s" +qemu_rdma_signal_unregister_append(uint64_t chunk, int pos) "Appending unregister chunk %" PRIu64 " at position %d" +qemu_rdma_signal_unregister_already(uint64_t chunk) "Unregister chunk %" PRIu64 " already in queue" +qemu_rdma_unregister_waiting_inflight(uint64_t chunk) "Cannot unregister inflight chunk: %" PRIu64 +qemu_rdma_unregister_waiting_proc(uint64_t chunk, int pos) "Processing unregister for chunk: %" PRIu64 " at position %d" +qemu_rdma_unregister_waiting_send(uint64_t chunk) "Sending unregister for chunk: %" PRIu64 +qemu_rdma_unregister_waiting_complete(uint64_t chunk) "Unregister for chunk: %" PRIu64 " complete." +qemu_rdma_write_flush(int sent) "sent total: %d" +qemu_rdma_write_one_block(int count, int block, uint64_t chunk, uint64_t current, uint64_t len, int nb_sent, int nb_chunks) "(%d) Not clobbering: block: %d chunk %" PRIu64 " current %" PRIu64 " len %" PRIu64 " %d %d" +qemu_rdma_write_one_post(uint64_t chunk, long addr, long remote, uint32_t len) "Posting chunk: %" PRIu64 ", addr: %lx remote: %lx, bytes %" PRIu32 +qemu_rdma_write_one_queue_full(void) "" +qemu_rdma_write_one_recvregres(int mykey, int theirkey, uint64_t chunk) "Received registration result: my key: %x their key %x, chunk %" PRIu64 +qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64 +qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)" +qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64 +rdma_start_incoming_migration(void) "" +rdma_start_incoming_migration_after_dest_init(void) "" +rdma_start_incoming_migration_after_rdma_listen(void) "" +rdma_start_outgoing_migration_after_rdma_connect(void) "" +rdma_start_outgoing_migration_after_rdma_source_init(void) "" + # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" diff --git a/util/Makefile.objs b/util/Makefile.objs index 93007e2f56..ceaba30939 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -17,3 +17,4 @@ util-obj-y += throttle.o util-obj-y += getauxval.o util-obj-y += readline.o util-obj-y += rfifolock.o +util-obj-y += rcu.o diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 41cb23df0c..50a29d8f7a 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -307,11 +307,13 @@ static inline void futex_wait(QemuEvent *ev, unsigned val) #else static inline void futex_wake(QemuEvent *ev, int n) { + pthread_mutex_lock(&ev->lock); if (n == 1) { pthread_cond_signal(&ev->cond); } else { pthread_cond_broadcast(&ev->cond); } + pthread_mutex_unlock(&ev->lock); } static inline void futex_wait(QemuEvent *ev, unsigned val) diff --git a/util/rcu.c b/util/rcu.c new file mode 100644 index 0000000000..c9c3e6e4ab --- /dev/null +++ b/util/rcu.c @@ -0,0 +1,291 @@ +/* + * urcu-mb.c + * + * Userspace RCU library with explicit memory barriers + * + * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * Copyright 2015 Red Hat, Inc. + * + * Ported to QEMU by Paolo Bonzini <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu-common.h" +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#include <errno.h> +#include "qemu/rcu.h" +#include "qemu/atomic.h" +#include "qemu/thread.h" + +/* + * Global grace period counter. Bit 0 is always one in rcu_gp_ctr. + * Bits 1 and above are defined in synchronize_rcu. + */ +#define RCU_GP_LOCKED (1UL << 0) +#define RCU_GP_CTR (1UL << 1) + +unsigned long rcu_gp_ctr = RCU_GP_LOCKED; + +QemuEvent rcu_gp_event; +static QemuMutex rcu_gp_lock; + +/* + * Check whether a quiescent state was crossed between the beginning of + * update_counter_and_wait and now. + */ +static inline int rcu_gp_ongoing(unsigned long *ctr) +{ + unsigned long v; + + v = atomic_read(ctr); + return v && (v != rcu_gp_ctr); +} + +/* Written to only by each individual reader. Read by both the reader and the + * writers. + */ +__thread struct rcu_reader_data rcu_reader; + +/* Protected by rcu_gp_lock. */ +typedef QLIST_HEAD(, rcu_reader_data) ThreadList; +static ThreadList registry = QLIST_HEAD_INITIALIZER(registry); + +/* Wait for previous parity/grace period to be empty of readers. */ +static void wait_for_readers(void) +{ + ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders); + struct rcu_reader_data *index, *tmp; + + for (;;) { + /* We want to be notified of changes made to rcu_gp_ongoing + * while we walk the list. + */ + qemu_event_reset(&rcu_gp_event); + + /* Instead of using atomic_mb_set for index->waiting, and + * atomic_mb_read for index->ctr, memory barriers are placed + * manually since writes to different threads are independent. + * atomic_mb_set has a smp_wmb before... + */ + smp_wmb(); + QLIST_FOREACH(index, ®istry, node) { + atomic_set(&index->waiting, true); + } + + /* ... and a smp_mb after. */ + smp_mb(); + + QLIST_FOREACH_SAFE(index, ®istry, node, tmp) { + if (!rcu_gp_ongoing(&index->ctr)) { + QLIST_REMOVE(index, node); + QLIST_INSERT_HEAD(&qsreaders, index, node); + + /* No need for mb_set here, worst of all we + * get some extra futex wakeups. + */ + atomic_set(&index->waiting, false); + } + } + + /* atomic_mb_read has smp_rmb after. */ + smp_rmb(); + + if (QLIST_EMPTY(®istry)) { + break; + } + + /* Wait for one thread to report a quiescent state and + * try again. + */ + qemu_event_wait(&rcu_gp_event); + } + + /* put back the reader list in the registry */ + QLIST_SWAP(®istry, &qsreaders, node); +} + +void synchronize_rcu(void) +{ + qemu_mutex_lock(&rcu_gp_lock); + + if (!QLIST_EMPTY(®istry)) { + /* In either case, the atomic_mb_set below blocks stores that free + * old RCU-protected pointers. + */ + if (sizeof(rcu_gp_ctr) < 8) { + /* For architectures with 32-bit longs, a two-subphases algorithm + * ensures we do not encounter overflow bugs. + * + * Switch parity: 0 -> 1, 1 -> 0. + */ + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + wait_for_readers(); + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + } else { + /* Increment current grace period. */ + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + } + + wait_for_readers(); + } + + qemu_mutex_unlock(&rcu_gp_lock); +} + + +#define RCU_CALL_MIN_SIZE 30 + +/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h + * from liburcu. Note that head is only used by the consumer. + */ +static struct rcu_head dummy; +static struct rcu_head *head = &dummy, **tail = &dummy.next; +static int rcu_call_count; +static QemuEvent rcu_call_ready_event; + +static void enqueue(struct rcu_head *node) +{ + struct rcu_head **old_tail; + + node->next = NULL; + old_tail = atomic_xchg(&tail, &node->next); + atomic_mb_set(old_tail, node); +} + +static struct rcu_head *try_dequeue(void) +{ + struct rcu_head *node, *next; + +retry: + /* Test for an empty list, which we do not expect. Note that for + * the consumer head and tail are always consistent. The head + * is consistent because only the consumer reads/writes it. + * The tail, because it is the first step in the enqueuing. + * It is only the next pointers that might be inconsistent. + */ + if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) { + abort(); + } + + /* If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + node = head; + next = atomic_mb_read(&head->next); + if (!next) { + return NULL; + } + + /* Since we are the sole consumer, and we excluded the empty case + * above, the queue will always have at least two nodes: the + * dummy node, and the one being removed. So we do not need to update + * the tail pointer. + */ + head = next; + + /* If we dequeued the dummy node, add it back at the end and retry. */ + if (node == &dummy) { + enqueue(node); + goto retry; + } + + return node; +} + +static void *call_rcu_thread(void *opaque) +{ + struct rcu_head *node; + + for (;;) { + int tries = 0; + int n = atomic_read(&rcu_call_count); + + /* Heuristically wait for a decent number of callbacks to pile up. + * Fetch rcu_call_count now, we only must process elements that were + * added before synchronize_rcu() starts. + */ + while (n < RCU_CALL_MIN_SIZE && ++tries <= 5) { + g_usleep(100000); + qemu_event_reset(&rcu_call_ready_event); + n = atomic_read(&rcu_call_count); + if (n < RCU_CALL_MIN_SIZE) { + qemu_event_wait(&rcu_call_ready_event); + n = atomic_read(&rcu_call_count); + } + } + + atomic_sub(&rcu_call_count, n); + synchronize_rcu(); + while (n > 0) { + node = try_dequeue(); + while (!node) { + qemu_event_reset(&rcu_call_ready_event); + node = try_dequeue(); + if (!node) { + qemu_event_wait(&rcu_call_ready_event); + node = try_dequeue(); + } + } + + n--; + node->func(node); + } + } + abort(); +} + +void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node)) +{ + node->func = func; + enqueue(node); + atomic_inc(&rcu_call_count); + qemu_event_set(&rcu_call_ready_event); +} + +void rcu_register_thread(void) +{ + assert(rcu_reader.ctr == 0); + qemu_mutex_lock(&rcu_gp_lock); + QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + qemu_mutex_unlock(&rcu_gp_lock); +} + +void rcu_unregister_thread(void) +{ + qemu_mutex_lock(&rcu_gp_lock); + QLIST_REMOVE(&rcu_reader, node); + qemu_mutex_unlock(&rcu_gp_lock); +} + +static void __attribute__((__constructor__)) rcu_init(void) +{ + QemuThread thread; + + qemu_mutex_init(&rcu_gp_lock); + qemu_event_init(&rcu_gp_event, true); + + qemu_event_init(&rcu_call_ready_event, false); + qemu_thread_create(&thread, "call_rcu", call_rcu_thread, + NULL, QEMU_THREAD_DETACHED); + + rcu_register_thread(); +} |