diff options
-rw-r--r-- | MAINTAINERS | 12 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | accel/tcg/cputlb.c | 626 | ||||
-rw-r--r-- | accel/tcg/softmmu_template.h | 454 | ||||
-rw-r--r-- | block.c | 6 | ||||
-rw-r--r-- | block/io.c | 45 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 4 | ||||
-rw-r--r-- | block/qcow2.c | 1 | ||||
-rw-r--r-- | block/qcow2.h | 1 | ||||
-rw-r--r-- | block/vdi.c | 15 | ||||
-rw-r--r-- | block/vvfat.c | 12 | ||||
-rw-r--r-- | docs/devel/index.rst | 1 | ||||
-rw-r--r-- | docs/devel/secure-coding-practices.rst | 106 | ||||
-rw-r--r-- | docs/security.texi | 131 | ||||
-rw-r--r-- | include/block/block.h | 4 | ||||
-rw-r--r-- | job.c | 2 | ||||
-rw-r--r-- | linux-user/elfload.c | 20 | ||||
-rw-r--r-- | linux-user/exit.c | 3 | ||||
-rw-r--r-- | linux-user/ioctls.h | 2 | ||||
-rw-r--r-- | linux-user/signal.c | 5 | ||||
-rw-r--r-- | linux-user/syscall.c | 47 | ||||
-rw-r--r-- | linux-user/uname.c | 5 | ||||
-rw-r--r-- | qemu-doc.texi | 3 | ||||
-rw-r--r-- | qemu-img.c | 77 | ||||
-rwxr-xr-x | tests/qemu-iotests/192 | 4 | ||||
-rwxr-xr-x | tests/qemu-iotests/252 | 124 | ||||
-rw-r--r-- | tests/qemu-iotests/252.out | 39 | ||||
-rw-r--r-- | tests/qemu-iotests/group | 1 | ||||
-rw-r--r-- | tests/test-block-iothread.c | 107 | ||||
-rw-r--r-- | util/aio-posix.c | 12 | ||||
-rw-r--r-- | util/readline.c | 174 |
31 files changed, 1320 insertions, 725 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 6d4a10b390..a73a61a546 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1391,6 +1391,13 @@ F: include/hw/net/ F: tests/virtio-net-test.c T: git https://github.com/jasowang/qemu.git net +Parallel NOR Flash devices +M: Philippe Mathieu-Daudé <philmd@redhat.com> +T: git https://gitlab.com/philmd/qemu.git pflash-next +S: Maintained +F: hw/block/pflash_cfi*.c +F: include/hw/block/flash.h + SCSI M: Paolo Bonzini <pbonzini@redhat.com> R: Fam Zheng <fam@euphon.net> @@ -2407,12 +2414,13 @@ F: block/ssh.c CURL L: qemu-block@nongnu.org -S: Supported +S: Odd Fixes F: block/curl.c GLUSTER L: qemu-block@nongnu.org -S: Supported +L: integration@gluster.org +S: Odd Fixes F: block/gluster.c Null Block Driver @@ -976,7 +976,7 @@ qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ qemu-deprecated.texi qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ qemu-monitor-info.texi docs/qemu-block-drivers.texi \ - docs/qemu-cpu-models.texi + docs/qemu-cpu-models.texi docs/security.texi docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \ docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f2f618217d..a083324768 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -856,9 +856,8 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) } static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, - int mmu_idx, - target_ulong addr, uintptr_t retaddr, - bool recheck, MMUAccessType access_type, int size) + int mmu_idx, target_ulong addr, uintptr_t retaddr, + MMUAccessType access_type, int size) { CPUState *cpu = ENV_GET_CPU(env); hwaddr mr_offset; @@ -868,30 +867,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (recheck) { - /* - * This is a TLB_RECHECK access, where the MMU protection - * covers a smaller range than a target page, and we must - * repeat the MMU check here. This tlb_fill() call might - * longjump out if this access should cause a guest exception. - */ - CPUTLBEntry *entry; - target_ulong tlb_addr; - - tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr); - - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = (access_type == MMU_DATA_LOAD ? - entry->addr_read : entry->addr_code); - if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { - /* RAM access */ - uintptr_t haddr = addr + entry->addend; - - return ldn_p((void *)haddr, size); - } - /* Fall through for handling IO accesses */ - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -925,9 +900,8 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, } static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, - int mmu_idx, - uint64_t val, target_ulong addr, - uintptr_t retaddr, bool recheck, int size) + int mmu_idx, uint64_t val, target_ulong addr, + uintptr_t retaddr, int size) { CPUState *cpu = ENV_GET_CPU(env); hwaddr mr_offset; @@ -936,30 +910,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (recheck) { - /* - * This is a TLB_RECHECK access, where the MMU protection - * covers a smaller range than a target page, and we must - * repeat the MMU check here. This tlb_fill() call might - * longjump out if this access should cause a guest exception. - */ - CPUTLBEntry *entry; - target_ulong tlb_addr; - - tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); - - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); - if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { - /* RAM access */ - uintptr_t haddr = addr + entry->addend; - - stn_p((void *)haddr, size, val); - return; - } - /* Fall through for handling IO accesses */ - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -1168,26 +1118,481 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } #ifdef TARGET_WORDS_BIGENDIAN -# define TGT_BE(X) (X) -# define TGT_LE(X) BSWAP(X) +#define NEED_BE_BSWAP 0 +#define NEED_LE_BSWAP 1 #else -# define TGT_BE(X) BSWAP(X) -# define TGT_LE(X) (X) +#define NEED_BE_BSWAP 1 +#define NEED_LE_BSWAP 0 #endif -#define MMUSUFFIX _mmu +/* + * Byte Swap Helper + * + * This should all dead code away depending on the build host and + * access type. + */ -#define DATA_SIZE 1 -#include "softmmu_template.h" +static inline uint64_t handle_bswap(uint64_t val, int size, bool big_endian) +{ + if ((big_endian && NEED_BE_BSWAP) || (!big_endian && NEED_LE_BSWAP)) { + switch (size) { + case 1: return val; + case 2: return bswap16(val); + case 4: return bswap32(val); + case 8: return bswap64(val); + default: + g_assert_not_reached(); + } + } else { + return val; + } +} -#define DATA_SIZE 2 -#include "softmmu_template.h" +/* + * Load Helpers + * + * We support two different access types. SOFTMMU_CODE_ACCESS is + * specifically for reading instructions from system memory. It is + * called by the translation loop and in some helpers where the code + * is disassembled. It shouldn't be called directly by guest code. + */ -#define DATA_SIZE 4 -#include "softmmu_template.h" +typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); -#define DATA_SIZE 8 -#include "softmmu_template.h" +static inline uint64_t __attribute__((always_inline)) +load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, + uintptr_t retaddr, size_t size, bool big_endian, bool code_read, + FullLoadHelper *full_load) +{ + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; + const size_t tlb_off = code_read ? + offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); + const MMUAccessType access_type = + code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; + unsigned a_bits = get_alignment_bits(get_memop(oi)); + void *haddr; + uint64_t res; + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, access_type, + mmu_idx, retaddr); + } + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + addr & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), addr, size, + access_type, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = code_read ? entry->addr_code : entry->addr_read; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if ((addr & (size - 1)) != 0) { + goto do_unaligned_access; + } + + if (tlb_addr & TLB_RECHECK) { + /* + * This is a TLB_RECHECK access, where the MMU protection + * covers a smaller range than a target page, and we must + * repeat the MMU check here. This tlb_fill() call might + * longjump out if this access should cause a guest exception. + */ + tlb_fill(ENV_GET_CPU(env), addr, size, + access_type, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + + tlb_addr = code_read ? entry->addr_code : entry->addr_read; + tlb_addr &= ~TLB_RECHECK; + if (!(tlb_addr & ~TARGET_PAGE_MASK)) { + /* RAM access */ + goto do_aligned_access; + } + } + + res = io_readx(env, &env->iotlb[mmu_idx][index], mmu_idx, addr, + retaddr, access_type, size); + return handle_bswap(res, size, big_endian); + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (size > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 + >= TARGET_PAGE_SIZE)) { + target_ulong addr1, addr2; + tcg_target_ulong r1, r2; + unsigned shift; + do_unaligned_access: + addr1 = addr & ~(size - 1); + addr2 = addr1 + size; + r1 = full_load(env, addr1, oi, retaddr); + r2 = full_load(env, addr2, oi, retaddr); + shift = (addr & (size - 1)) * 8; + + if (big_endian) { + /* Big-endian combine. */ + res = (r1 << shift) | (r2 >> ((size * 8) - shift)); + } else { + /* Little-endian combine. */ + res = (r1 >> shift) | (r2 << ((size * 8) - shift)); + } + return res & MAKE_64BIT_MASK(0, size * 8); + } + + do_aligned_access: + haddr = (void *)((uintptr_t)addr + entry->addend); + switch (size) { + case 1: + res = ldub_p(haddr); + break; + case 2: + if (big_endian) { + res = lduw_be_p(haddr); + } else { + res = lduw_le_p(haddr); + } + break; + case 4: + if (big_endian) { + res = (uint32_t)ldl_be_p(haddr); + } else { + res = (uint32_t)ldl_le_p(haddr); + } + break; + case 8: + if (big_endian) { + res = ldq_be_p(haddr); + } else { + res = ldq_le_p(haddr); + } + break; + default: + g_assert_not_reached(); + } + + return res; +} + +/* + * For the benefit of TCG generated code, we want to avoid the + * complication of ABI-specific return type promotion and always + * return a value extended to the register size of the host. This is + * tcg_target_long, except in the case of a 32-bit host and 64-bit + * data, and for that we always have uint64_t. + * + * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. + */ + +static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 1, false, false, + full_ldub_mmu); +} + +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_ldub_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, false, false, + full_le_lduw_mmu); +} + +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_lduw_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, true, false, + full_be_lduw_mmu); +} + +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_lduw_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, false, false, + full_le_ldul_mmu); +} + +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_ldul_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, true, false, + full_be_ldul_mmu); +} + +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_ldul_mmu(env, addr, oi, retaddr); +} + +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, false, false, + helper_le_ldq_mmu); +} + +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, true, false, + helper_be_ldq_mmu); +} + +/* + * Provide signed versions of the load routines as well. We can of course + * avoid this for 64-bit data, or for 32-bit data on 32-bit host. + */ + + +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); +} + +/* + * Store Helpers + */ + +static inline void __attribute__((always_inline)) +store_helper(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr, size_t size, bool big_endian) +{ + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); + const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); + unsigned a_bits = get_alignment_bits(get_memop(oi)); + void *haddr; + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + addr & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, + mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if ((addr & (size - 1)) != 0) { + goto do_unaligned_access; + } + + if (tlb_addr & TLB_RECHECK) { + /* + * This is a TLB_RECHECK access, where the MMU protection + * covers a smaller range than a target page, and we must + * repeat the MMU check here. This tlb_fill() call might + * longjump out if this access should cause a guest exception. + */ + tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, + mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + + tlb_addr = tlb_addr_write(entry); + tlb_addr &= ~TLB_RECHECK; + if (!(tlb_addr & ~TARGET_PAGE_MASK)) { + /* RAM access */ + goto do_aligned_access; + } + } + + io_writex(env, &env->iotlb[mmu_idx][index], mmu_idx, + handle_bswap(val, size, big_endian), + addr, retaddr, size); + return; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (size > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 + >= TARGET_PAGE_SIZE)) { + int i; + uintptr_t index2; + CPUTLBEntry *entry2; + target_ulong page2, tlb_addr2; + do_unaligned_access: + /* + * Ensure the second page is in the TLB. Note that the first page + * is already guaranteed to be filled, and that the second page + * cannot evict the first. + */ + page2 = (addr + size) & TARGET_PAGE_MASK; + index2 = tlb_index(env, mmu_idx, page2); + entry2 = tlb_entry(env, mmu_idx, page2); + tlb_addr2 = tlb_addr_write(entry2); + if (!tlb_hit_page(tlb_addr2, page2) + && !victim_tlb_hit(env, mmu_idx, index2, tlb_off, + page2 & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), page2, size, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* + * XXX: not efficient, but simple. + * This loop must go in the forward direction to avoid issues + * with self-modifying code in Windows 64-bit. + */ + for (i = 0; i < size; ++i) { + uint8_t val8; + if (big_endian) { + /* Big-endian extract. */ + val8 = val >> (((size - 1) * 8) - (i * 8)); + } else { + /* Little-endian extract. */ + val8 = val >> (i * 8); + } + helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); + } + return; + } + + do_aligned_access: + haddr = (void *)((uintptr_t)addr + entry->addend); + switch (size) { + case 1: + stb_p(haddr, val); + break; + case 2: + if (big_endian) { + stw_be_p(haddr, val); + } else { + stw_le_p(haddr, val); + } + break; + case 4: + if (big_endian) { + stl_be_p(haddr, val); + } else { + stl_le_p(haddr, val); + } + break; + case 8: + if (big_endian) { + stq_be_p(haddr, val); + } else { + stq_le_p(haddr, val); + } + break; + default: + g_assert_not_reached(); + break; + } +} + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 1, false); +} + +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 2, false); +} + +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 2, true); +} + +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 4, false); +} + +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 4, true); +} + +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 8, false); +} + +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 8, true); +} /* First set of helpers allows passing in of OI and RETADDR. This makes them callable from other helpers. */ @@ -1248,20 +1653,81 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* Code access functions. */ -#undef MMUSUFFIX -#define MMUSUFFIX _cmmu -#undef GETPC -#define GETPC() ((uintptr_t)0) -#define SOFTMMU_CODE_ACCESS +static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 1, false, true, + full_ldub_cmmu); +} -#define DATA_SIZE 1 -#include "softmmu_template.h" +uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_ldub_cmmu(env, addr, oi, retaddr); +} -#define DATA_SIZE 2 -#include "softmmu_template.h" +static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, false, true, + full_le_lduw_cmmu); +} -#define DATA_SIZE 4 -#include "softmmu_template.h" +uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_lduw_cmmu(env, addr, oi, retaddr); +} -#define DATA_SIZE 8 -#include "softmmu_template.h" +static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, true, true, + full_be_lduw_cmmu); +} + +uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_lduw_cmmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, false, true, + full_le_ldul_cmmu); +} + +uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_ldul_cmmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, true, true, + full_be_ldul_cmmu); +} + +uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_ldul_cmmu(env, addr, oi, retaddr); +} + +uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, false, true, + helper_le_ldq_cmmu); +} + +uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, true, true, + helper_be_ldq_cmmu); +} diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h deleted file mode 100644 index e970a8b378..0000000000 --- a/accel/tcg/softmmu_template.h +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Software MMU support - * - * Generate helpers used by TCG for qemu_ld/st ops and code load - * functions. - * - * Included from target op helpers and exec.c. - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#if DATA_SIZE == 8 -#define SUFFIX q -#define LSUFFIX q -#define SDATA_TYPE int64_t -#define DATA_TYPE uint64_t -#elif DATA_SIZE == 4 -#define SUFFIX l -#define LSUFFIX l -#define SDATA_TYPE int32_t -#define DATA_TYPE uint32_t -#elif DATA_SIZE == 2 -#define SUFFIX w -#define LSUFFIX uw -#define SDATA_TYPE int16_t -#define DATA_TYPE uint16_t -#elif DATA_SIZE == 1 -#define SUFFIX b -#define LSUFFIX ub -#define SDATA_TYPE int8_t -#define DATA_TYPE uint8_t -#else -#error unsupported data size -#endif - - -/* For the benefit of TCG generated code, we want to avoid the complication - of ABI-specific return type promotion and always return a value extended - to the register size of the host. This is tcg_target_long, except in the - case of a 32-bit host and 64-bit data, and for that we always have - uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8 -# define WORD_TYPE DATA_TYPE -# define USUFFIX SUFFIX -#else -# define WORD_TYPE tcg_target_ulong -# define USUFFIX glue(u, SUFFIX) -# define SSUFFIX glue(s, SUFFIX) -#endif - -#ifdef SOFTMMU_CODE_ACCESS -#define READ_ACCESS_TYPE MMU_INST_FETCH -#define ADDR_READ addr_code -#else -#define READ_ACCESS_TYPE MMU_DATA_LOAD -#define ADDR_READ addr_read -#endif - -#if DATA_SIZE == 8 -# define BSWAP(X) bswap64(X) -#elif DATA_SIZE == 4 -# define BSWAP(X) bswap32(X) -#elif DATA_SIZE == 2 -# define BSWAP(X) bswap16(X) -#else -# define BSWAP(X) (X) -#endif - -#if DATA_SIZE == 1 -# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) -# define helper_be_ld_name helper_le_ld_name -# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX) -# define helper_be_lds_name helper_le_lds_name -# define helper_le_st_name glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX) -# define helper_be_st_name helper_le_st_name -#else -# define helper_le_ld_name glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX) -# define helper_be_ld_name glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX) -# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX) -# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX) -# define helper_le_st_name glue(glue(helper_le_st, SUFFIX), MMUSUFFIX) -# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) -#endif - -#ifndef SOFTMMU_CODE_ACCESS -static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, - size_t mmu_idx, size_t index, - target_ulong addr, - uintptr_t retaddr, - bool recheck, - MMUAccessType access_type) -{ - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; - return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck, - access_type, DATA_SIZE); -} -#endif - -WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->ADDR_READ; - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - DATA_TYPE res; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(ADDR_READ, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = entry->ADDR_READ; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr, - tlb_addr & TLB_RECHECK, - READ_ACCESS_TYPE); - res = TGT_LE(res); - return res; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - DATA_TYPE res1, res2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~(DATA_SIZE - 1); - addr2 = addr1 + DATA_SIZE; - res1 = helper_le_ld_name(env, addr1, oi, retaddr); - res2 = helper_le_ld_name(env, addr2, oi, retaddr); - shift = (addr & (DATA_SIZE - 1)) * 8; - - /* Little-endian combine. */ - res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); - return res; - } - - haddr = addr + entry->addend; -#if DATA_SIZE == 1 - res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); -#else - res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr); -#endif - return res; -} - -#if DATA_SIZE > 1 -WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->ADDR_READ; - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - DATA_TYPE res; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(ADDR_READ, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = entry->ADDR_READ; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr, - tlb_addr & TLB_RECHECK, - READ_ACCESS_TYPE); - res = TGT_BE(res); - return res; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - DATA_TYPE res1, res2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~(DATA_SIZE - 1); - addr2 = addr1 + DATA_SIZE; - res1 = helper_be_ld_name(env, addr1, oi, retaddr); - res2 = helper_be_ld_name(env, addr2, oi, retaddr); - shift = (addr & (DATA_SIZE - 1)) * 8; - - /* Big-endian combine. */ - res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); - return res; - } - - haddr = addr + entry->addend; - res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); - return res; -} -#endif /* DATA_SIZE > 1 */ - -#ifndef SOFTMMU_CODE_ACCESS - -/* Provide signed versions of the load routines as well. We can of course - avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ -#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS -WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - return (SDATA_TYPE)helper_le_ld_name(env, addr, oi, retaddr); -} - -# if DATA_SIZE > 1 -WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - return (SDATA_TYPE)helper_be_ld_name(env, addr, oi, retaddr); -} -# endif -#endif - -static inline void glue(io_write, SUFFIX)(CPUArchState *env, - size_t mmu_idx, size_t index, - DATA_TYPE val, - target_ulong addr, - uintptr_t retaddr, - bool recheck) -{ - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; - return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, - recheck, DATA_SIZE); -} - -void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(entry); - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_LE(val); - glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, - retaddr, tlb_addr & TLB_RECHECK); - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - int i; - target_ulong page2; - CPUTLBEntry *entry2; - do_unaligned_access: - /* Ensure the second page is in the TLB. Note that the first page - is already guaranteed to be filled, and that the second page - cannot evict the first. */ - page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(tlb_addr_write(entry2), page2) - && !VICTIM_TLB_HIT(addr_write, page2)) { - tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* XXX: not efficient, but simple. */ - /* This loop must go in the forward direction to avoid issues - with self-modifying code in Windows 64-bit. */ - for (i = 0; i < DATA_SIZE; ++i) { - /* Little-endian extract. */ - uint8_t val8 = val >> (i * 8); - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, - oi, retaddr); - } - return; - } - - haddr = addr + entry->addend; -#if DATA_SIZE == 1 - glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); -#else - glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); -#endif -} - -#if DATA_SIZE > 1 -void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(entry); - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_BE(val); - glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr, - tlb_addr & TLB_RECHECK); - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - int i; - target_ulong page2; - CPUTLBEntry *entry2; - do_unaligned_access: - /* Ensure the second page is in the TLB. Note that the first page - is already guaranteed to be filled, and that the second page - cannot evict the first. */ - page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(tlb_addr_write(entry2), page2) - && !VICTIM_TLB_HIT(addr_write, page2)) { - tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* XXX: not efficient, but simple */ - /* This loop must go in the forward direction to avoid issues - with self-modifying code. */ - for (i = 0; i < DATA_SIZE; ++i) { - /* Big-endian extract. */ - uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, - oi, retaddr); - } - return; - } - - haddr = addr + entry->addend; - glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); -} -#endif /* DATA_SIZE > 1 */ -#endif /* !defined(SOFTMMU_CODE_ACCESS) */ - -#undef READ_ACCESS_TYPE -#undef DATA_TYPE -#undef SUFFIX -#undef LSUFFIX -#undef DATA_SIZE -#undef ADDR_READ -#undef WORD_TYPE -#undef SDATA_TYPE -#undef USUFFIX -#undef SSUFFIX -#undef BSWAP -#undef helper_le_ld_name -#undef helper_be_ld_name -#undef helper_le_lds_name -#undef helper_be_lds_name -#undef helper_le_st_name -#undef helper_be_st_name @@ -4082,14 +4082,14 @@ static void bdrv_delete(BlockDriverState *bs) assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); - bdrv_close(bs); - /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + bdrv_close(bs); + g_free(bs); } @@ -4121,7 +4121,7 @@ typedef struct CheckCo { int ret; } CheckCo; -static void bdrv_check_co_entry(void *opaque) +static void coroutine_fn bdrv_check_co_entry(void *opaque) { CheckCo *cco = opaque; cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix); diff --git a/block/io.c b/block/io.c index dfc153b8d8..aeebc9c23c 100644 --- a/block/io.c +++ b/block/io.c @@ -837,42 +837,6 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, return rwco.ret; } -/* - * Process a synchronous request using coroutines - */ -static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf, - int nb_sectors, bool is_write, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, - nb_sectors * BDRV_SECTOR_SIZE); - - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS, - &qiov, is_write, flags); -} - -/* return < 0 if error. See bdrv_write() for the return codes */ -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0); -} - -/* Return < 0 if error. Important errors are: - -EIO generic I/O error (may happen for all errors) - -ENOMEDIUM No media inserted. - -EINVAL Invalid sector number or nb_sectors - -EACCES Trying to write a read-only device -*/ -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0); -} - int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -935,6 +899,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* See bdrv_pwrite() for the return codes */ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); @@ -958,6 +923,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* Return no. of bytes on success or < 0 on error. Important errors are: + -EIO generic I/O error (may happen for all errors) + -ENOMEDIUM No media inserted. + -EINVAL Invalid offset or number of bytes + -EACCES Trying to write a read-only device +*/ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); @@ -1516,7 +1487,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, assert(!bs->supported_zero_flags); } - if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) { + if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) { /* Fall back to bounce buffer if write zeroes is unsupported */ BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index fa7ac1f7cb..7481903396 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2429,8 +2429,8 @@ write_refblocks: on_disk_refblock = (void *)((char *) *refcount_table + refblock_index * s->cluster_size); - ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, - on_disk_refblock, s->cluster_sectors); + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); goto fail; diff --git a/block/qcow2.c b/block/qcow2.c index a520d116ef..8e024007db 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1259,7 +1259,6 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); /* Initialise version 3 header fields */ if (header.version == 2) { diff --git a/block/qcow2.h b/block/qcow2.h index fdee297f33..e62508d1ce 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -266,7 +266,6 @@ typedef struct Qcow2BitmapHeaderExt { typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; - int cluster_sectors; int l2_slice_size; int l2_bits; int l2_size; diff --git a/block/vdi.c b/block/vdi.c index e1c42ad732..d7ef6628e7 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -171,6 +171,8 @@ typedef struct { uint64_t unused2[7]; } QEMU_PACKED VdiHeader; +QEMU_BUILD_BUG_ON(sizeof(VdiHeader) != 512); + typedef struct { /* The block map entries are little endian (even in memory). */ uint32_t *bmap; @@ -384,7 +386,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, logout("\n"); - ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { goto fail; } @@ -484,8 +486,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, - bmap_size); + ret = bdrv_pread(bs->file, header.offset_bmap, s->bmap, + bmap_size * SECTOR_SIZE); if (ret < 0) { goto fail_free_bmap; } @@ -704,7 +706,7 @@ nonallocating_write: assert(VDI_IS_ALLOCATED(bmap_first)); *header = s->header; vdi_header_to_le(header); - ret = bdrv_write(bs->file, 0, block, 1); + ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); g_free(block); block = NULL; @@ -722,10 +724,11 @@ nonallocating_write: base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; logout("will write %u block map sectors starting from entry %u\n", n_sectors, bmap_first); - ret = bdrv_write(bs->file, offset, base, n_sectors); + ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base, + n_sectors * SECTOR_SIZE); } - return ret; + return ret < 0 ? ret : 0; } static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, diff --git a/block/vvfat.c b/block/vvfat.c index 5f66787890..253cc716dd 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1494,8 +1494,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64 " allocated\n", sector_num, n >> BDRV_SECTOR_BITS)); - if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, - n >> BDRV_SECTOR_BITS)) { + if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, + buf + i * 0x200, n) < 0) { return -1; } i += (n >> BDRV_SECTOR_BITS) - 1; @@ -1983,8 +1983,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, if (res) { return -1; } - res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1); - if (res) { + res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, + s->cluster_buffer, BDRV_SECTOR_SIZE); + if (res < 0) { return -2; } } @@ -3050,7 +3051,8 @@ DLOG(checkpoint()); * Use qcow backend. Commit later. */ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); - ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors); + ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf, + nb_sectors * BDRV_SECTOR_SIZE); if (ret < 0) { fprintf(stderr, "Error writing to qcow backend\n"); return ret; diff --git a/docs/devel/index.rst b/docs/devel/index.rst index ebbab636ce..2a4ddf40ad 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -20,3 +20,4 @@ Contents: stable-process testing decodetree + secure-coding-practices diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst new file mode 100644 index 0000000000..cbfc8af67e --- /dev/null +++ b/docs/devel/secure-coding-practices.rst @@ -0,0 +1,106 @@ +======================= +Secure Coding Practices +======================= +This document covers topics that both developers and security researchers must +be aware of so that they can develop safe code and audit existing code +properly. + +Reporting Security Bugs +----------------------- +For details on how to report security bugs or ask questions about potential +security bugs, see the `Security Process wiki page +<https://wiki.qemu.org/SecurityProcess>`_. + +General Secure C Coding Practices +--------------------------------- +Most CVEs (security bugs) reported against QEMU are not specific to +virtualization or emulation. They are simply C programming bugs. Therefore +it's critical to be aware of common classes of security bugs. + +There is a wide selection of resources available covering secure C coding. For +example, the `CERT C Coding Standard +<https://wiki.sei.cmu.edu/confluence/display/c/SEI+CERT+C+Coding+Standard>`_ +covers the most important classes of security bugs. + +Instead of describing them in detail here, only the names of the most important +classes of security bugs are mentioned: + +* Buffer overflows +* Use-after-free and double-free +* Integer overflows +* Format string vulnerabilities + +Some of these classes of bugs can be detected by analyzers. Static analysis is +performed regularly by Coverity and the most obvious of these bugs are even +reported by compilers. Dynamic analysis is possible with valgrind, tsan, and +asan. + +Input Validation +---------------- +Inputs from the guest or external sources (e.g. network, files) cannot be +trusted and may be invalid. Inputs must be checked before using them in a way +that could crash the program, expose host memory to the guest, or otherwise be +exploitable by an attacker. + +The most sensitive attack surface is device emulation. All hardware register +accesses and data read from guest memory must be validated. A typical example +is a device that contains multiple units that are selectable by the guest via +an index register:: + + typedef struct { + ProcessingUnit unit[2]; + ... + } MyDeviceState; + + static void mydev_writel(void *opaque, uint32_t addr, uint32_t val) + { + MyDeviceState *mydev = opaque; + ProcessingUnit *unit; + + switch (addr) { + case MYDEV_SELECT_UNIT: + unit = &mydev->unit[val]; <-- this input wasn't validated! + ... + } + } + +If ``val`` is not in range [0, 1] then an out-of-bounds memory access will take +place when ``unit`` is dereferenced. The code must check that ``val`` is 0 or +1 and handle the case where it is invalid. + +Unexpected Device Accesses +-------------------------- +The guest may access device registers in unusual orders or at unexpected +moments. Device emulation code must not assume that the guest follows the +typical "theory of operation" presented in driver writer manuals. The guest +may make nonsense accesses to device registers such as starting operations +before the device has been fully initialized. + +A related issue is that device emulation code must be prepared for unexpected +device register accesses while asynchronous operations are in progress. A +well-behaved guest might wait for a completion interrupt before accessing +certain device registers. Device emulation code must handle the case where the +guest overwrites registers or submits further requests before an ongoing +request completes. Unexpected accesses must not cause memory corruption or +leaks in QEMU. + +Invalid device register accesses can be reported with +``qemu_log_mask(LOG_GUEST_ERROR, ...)``. The ``-d guest_errors`` command-line +option enables these log messages. + +Live Migration +-------------- +Device state can be saved to disk image files and shared with other users. +Live migration code must validate inputs when loading device state so an +attacker cannot gain control by crafting invalid device states. Device state +is therefore considered untrusted even though it is typically generated by QEMU +itself. + +Guest Memory Access Races +------------------------- +Guests with multiple vCPUs may modify guest RAM while device emulation code is +running. Device emulation code must copy in descriptors and other guest RAM +structures and only process the local copy. This prevents +time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to +crash when a vCPU thread modifies guest RAM while device emulation is +processing it. diff --git a/docs/security.texi b/docs/security.texi new file mode 100644 index 0000000000..927764f1e6 --- /dev/null +++ b/docs/security.texi @@ -0,0 +1,131 @@ +@node Security +@chapter Security + +@section Overview + +This chapter explains the security requirements that QEMU is designed to meet +and principles for securely deploying QEMU. + +@section Security Requirements + +QEMU supports many different use cases, some of which have stricter security +requirements than others. The community has agreed on the overall security +requirements that users may depend on. These requirements define what is +considered supported from a security perspective. + +@subsection Virtualization Use Case + +The virtualization use case covers cloud and virtual private server (VPS) +hosting, as well as traditional data center and desktop virtualization. These +use cases rely on hardware virtualization extensions to execute guest code +safely on the physical CPU at close-to-native speed. + +The following entities are untrusted, meaning that they may be buggy or +malicious: + +@itemize +@item Guest +@item User-facing interfaces (e.g. VNC, SPICE, WebSocket) +@item Network protocols (e.g. NBD, live migration) +@item User-supplied files (e.g. disk images, kernels, device trees) +@item Passthrough devices (e.g. PCI, USB) +@end itemize + +Bugs affecting these entities are evaluated on whether they can cause damage in +real-world use cases and treated as security bugs if this is the case. + +@subsection Non-virtualization Use Case + +The non-virtualization use case covers emulation using the Tiny Code Generator +(TCG). In principle the TCG and device emulation code used in conjunction with +the non-virtualization use case should meet the same security requirements as +the virtualization use case. However, for historical reasons much of the +non-virtualization use case code was not written with these security +requirements in mind. + +Bugs affecting the non-virtualization use case are not considered security +bugs at this time. Users with non-virtualization use cases must not rely on +QEMU to provide guest isolation or any security guarantees. + +@section Architecture + +This section describes the design principles that ensure the security +requirements are met. + +@subsection Guest Isolation + +Guest isolation is the confinement of guest code to the virtual machine. When +guest code gains control of execution on the host this is called escaping the +virtual machine. Isolation also includes resource limits such as throttling of +CPU, memory, disk, or network. Guests must be unable to exceed their resource +limits. + +QEMU presents an attack surface to the guest in the form of emulated devices. +The guest must not be able to gain control of QEMU. Bugs in emulated devices +could allow malicious guests to gain code execution in QEMU. At this point the +guest has escaped the virtual machine and is able to act in the context of the +QEMU process on the host. + +Guests often interact with other guests and share resources with them. A +malicious guest must not gain control of other guests or access their data. +Disk image files and network traffic must be protected from other guests unless +explicitly shared between them by the user. + +@subsection Principle of Least Privilege + +The principle of least privilege states that each component only has access to +the privileges necessary for its function. In the case of QEMU this means that +each process only has access to resources belonging to the guest. + +The QEMU process should not have access to any resources that are inaccessible +to the guest. This way the guest does not gain anything by escaping into the +QEMU process since it already has access to those same resources from within +the guest. + +Following the principle of least privilege immediately fulfills guest isolation +requirements. For example, guest A only has access to its own disk image file +@code{a.img} and not guest B's disk image file @code{b.img}. + +In reality certain resources are inaccessible to the guest but must be +available to QEMU to perform its function. For example, host system calls are +necessary for QEMU but are not exposed to guests. A guest that escapes into +the QEMU process can then begin invoking host system calls. + +New features must be designed to follow the principle of least privilege. +Should this not be possible for technical reasons, the security risk must be +clearly documented so users are aware of the trade-off of enabling the feature. + +@subsection Isolation mechanisms + +Several isolation mechanisms are available to realize this architecture of +guest isolation and the principle of least privilege. With the exception of +Linux seccomp, these mechanisms are all deployed by management tools that +launch QEMU, such as libvirt. They are also platform-specific so they are only +described briefly for Linux here. + +The fundamental isolation mechanism is that QEMU processes must run as +unprivileged users. Sometimes it seems more convenient to launch QEMU as +root to give it access to host devices (e.g. @code{/dev/net/tun}) but this poses a +huge security risk. File descriptor passing can be used to give an otherwise +unprivileged QEMU process access to host devices without running QEMU as root. +It is also possible to launch QEMU as a non-root user and configure UNIX groups +for access to @code{/dev/kvm}, @code{/dev/net/tun}, and other device nodes. +Some Linux distros already ship with UNIX groups for these devices by default. + +@itemize +@item SELinux and AppArmor make it possible to confine processes beyond the +traditional UNIX process and file permissions model. They restrict the QEMU +process from accessing processes and files on the host system that are not +needed by QEMU. + +@item Resource limits and cgroup controllers provide throughput and utilization +limits on key resources such as CPU time, memory, and I/O bandwidth. + +@item Linux namespaces can be used to make process, file system, and other system +resources unavailable to QEMU. A namespaced QEMU process is restricted to only +those resources that were granted to it. + +@item Linux seccomp is available via the QEMU @option{--sandbox} option. It disables +system calls that are not needed by QEMU, thereby reducing the host kernel +attack surface. +@end itemize diff --git a/include/block/block.h b/include/block/block.h index c7a26199aa..5e2b98b0ee 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -316,10 +316,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void bdrv_reopen_commit(BDRVReopenState *reopen_state); void bdrv_reopen_abort(BDRVReopenState *reopen_state); -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors); -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); @@ -432,7 +432,7 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job)) timer_del(&job->sleep_timer); job->busy = true; job_unlock(); - aio_co_wake(job->co); + aio_co_enter(job->aio_context, job->co); } void job_enter(Job *job) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index c1a26021f8..ef42e02d82 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2366,11 +2366,19 @@ static void load_elf_image(const char *image_name, int image_fd, vaddr_ps = TARGET_ELF_PAGESTART(vaddr); vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po); - error = target_mmap(vaddr_ps, vaddr_len, - elf_prot, MAP_PRIVATE | MAP_FIXED, - image_fd, eppnt->p_offset - vaddr_po); - if (error == -1) { - goto exit_perror; + /* + * Some segments may be completely empty without any backing file + * segment, in that case just let zero_bss allocate an empty buffer + * for it. + */ + if (eppnt->p_filesz != 0) { + error = target_mmap(vaddr_ps, vaddr_len, elf_prot, + MAP_PRIVATE | MAP_FIXED, + image_fd, eppnt->p_offset - vaddr_po); + + if (error == -1) { + goto exit_perror; + } } vaddr_ef = vaddr + eppnt->p_filesz; @@ -2872,7 +2880,7 @@ struct target_elf_prpsinfo { target_gid_t pr_gid; target_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; /* Lots missing */ - char pr_fname[16]; /* filename of executable */ + char pr_fname[16] QEMU_NONSTRING; /* filename of executable */ char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; diff --git a/linux-user/exit.c b/linux-user/exit.c index 14e94e28fa..bdda720553 100644 --- a/linux-user/exit.c +++ b/linux-user/exit.c @@ -18,6 +18,9 @@ */ #include "qemu/osdep.h" #include "qemu.h" +#ifdef TARGET_GPROF +#include <sys/gmon.h> +#endif #ifdef CONFIG_GCOV extern void __gcov_dump(void); diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h index ae8951625f..37501f575c 100644 --- a/linux-user/ioctls.h +++ b/linux-user/ioctls.h @@ -178,7 +178,7 @@ #endif /* CONFIG_USBFS */ IOCTL(SIOCATMARK, IOC_R, MK_PTR(TYPE_INT)) - IOCTL(SIOCGIFNAME, IOC_RW, MK_PTR(TYPE_INT)) + IOCTL(SIOCGIFNAME, IOC_RW, MK_PTR(MK_STRUCT(STRUCT_int_ifreq))) IOCTL(SIOCGIFFLAGS, IOC_W | IOC_R, MK_PTR(MK_STRUCT(STRUCT_short_ifreq))) IOCTL(SIOCSIFFLAGS, IOC_W, MK_PTR(MK_STRUCT(STRUCT_short_ifreq))) IOCTL(SIOCGIFADDR, IOC_W | IOC_R, MK_PTR(MK_STRUCT(STRUCT_sockaddr_ifreq))) diff --git a/linux-user/signal.c b/linux-user/signal.c index e2c0b37173..44b2d3b35a 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -508,6 +508,11 @@ void signal_init(void) act.sa_flags = SA_SIGINFO; act.sa_sigaction = host_signal_handler; for(i = 1; i <= TARGET_NSIG; i++) { +#ifdef TARGET_GPROF + if (i == SIGPROF) { + continue; + } +#endif host_sig = target_to_host_signal(i); sigaction(host_sig, NULL, &oact); if (oact.sa_sigaction == (void *)SIG_IGN) { diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 96cd4bf86d..f5ff6f5dc8 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -59,9 +59,6 @@ #ifdef CONFIG_TIMERFD #include <sys/timerfd.h> #endif -#ifdef TARGET_GPROF -#include <sys/gmon.h> -#endif #ifdef CONFIG_EVENTFD #include <sys/eventfd.h> #endif @@ -1864,6 +1861,28 @@ static abi_long do_setsockopt(int sockfd, int level, int optname, case IPV6_RECVHOPLIMIT: case IPV6_2292HOPLIMIT: case IPV6_CHECKSUM: + case IPV6_ADDRFORM: + case IPV6_2292PKTINFO: + case IPV6_RECVTCLASS: + case IPV6_RECVRTHDR: + case IPV6_2292RTHDR: + case IPV6_RECVHOPOPTS: + case IPV6_2292HOPOPTS: + case IPV6_RECVDSTOPTS: + case IPV6_2292DSTOPTS: + case IPV6_TCLASS: +#ifdef IPV6_RECVPATHMTU + case IPV6_RECVPATHMTU: +#endif +#ifdef IPV6_TRANSPARENT + case IPV6_TRANSPARENT: +#endif +#ifdef IPV6_FREEBIND + case IPV6_FREEBIND: +#endif +#ifdef IPV6_RECVORIGDSTADDR + case IPV6_RECVORIGDSTADDR: +#endif val = 0; if (optlen < sizeof(uint32_t)) { return -TARGET_EINVAL; @@ -2358,6 +2377,28 @@ static abi_long do_getsockopt(int sockfd, int level, int optname, case IPV6_RECVHOPLIMIT: case IPV6_2292HOPLIMIT: case IPV6_CHECKSUM: + case IPV6_ADDRFORM: + case IPV6_2292PKTINFO: + case IPV6_RECVTCLASS: + case IPV6_RECVRTHDR: + case IPV6_2292RTHDR: + case IPV6_RECVHOPOPTS: + case IPV6_2292HOPOPTS: + case IPV6_RECVDSTOPTS: + case IPV6_2292DSTOPTS: + case IPV6_TCLASS: +#ifdef IPV6_RECVPATHMTU + case IPV6_RECVPATHMTU: +#endif +#ifdef IPV6_TRANSPARENT + case IPV6_TRANSPARENT: +#endif +#ifdef IPV6_FREEBIND + case IPV6_FREEBIND: +#endif +#ifdef IPV6_RECVORIGDSTADDR + case IPV6_RECVORIGDSTADDR: +#endif if (get_user_u32(len, optlen)) return -TARGET_EFAULT; if (len < 0) diff --git a/linux-user/uname.c b/linux-user/uname.c index 313b79dbad..1c05f95387 100644 --- a/linux-user/uname.c +++ b/linux-user/uname.c @@ -72,9 +72,8 @@ const char *cpu_to_uname_machine(void *cpu_env) #define COPY_UTSNAME_FIELD(dest, src) \ do { \ - /* __NEW_UTS_LEN doesn't include terminating null */ \ - (void) strncpy((dest), (src), __NEW_UTS_LEN); \ - (dest)[__NEW_UTS_LEN] = '\0'; \ + memcpy((dest), (src), MIN(sizeof(src), sizeof(dest))); \ + (dest)[sizeof(dest) - 1] = '\0'; \ } while (0) int sys_uname(struct new_utsname *buf) diff --git a/qemu-doc.texi b/qemu-doc.texi index ae3c3f9632..577d1e8376 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -38,6 +38,7 @@ * QEMU Guest Agent:: * QEMU User space emulator:: * System requirements:: +* Security:: * Implementation notes:: * Deprecated features:: * Supported build platforms:: @@ -2878,6 +2879,8 @@ added with Linux 4.5 which is supported by the major distros. And even if RHEL7 has kernel 3.10, KVM there has the required functionality there to make it close to a 4.5 or newer kernel. +@include docs/security.texi + @include qemu-tech.texi @include qemu-deprecated.texi diff --git a/qemu-img.c b/qemu-img.c index e6ad5978e0..28fba1e7a7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -37,6 +37,7 @@ #include "qemu/option.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/units.h" #include "qom/object_interfaces.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -1216,7 +1217,7 @@ static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2, return res; } -#define IO_BUF_SIZE (2 * 1024 * 1024) +#define IO_BUF_SIZE (2 * MiB) /* * Check if passed sectors are empty (not allocated or contain only 0 bytes) @@ -2960,7 +2961,7 @@ static int img_map(int argc, char **argv) int64_t n; /* Probe up to 1 GiB at a time. */ - n = MIN(1 << 30, length - offset); + n = MIN(1 * GiB, length - offset); ret = get_block_status(bs, offset, n, &next); if (ret < 0) { @@ -3311,26 +3312,30 @@ static int img_rebase(int argc, char **argv) char backing_name[PATH_MAX]; QDict *options = NULL; - if (bs->backing_format[0] != '\0') { - options = qdict_new(); - qdict_put_str(options, "driver", bs->backing_format); - } - - if (force_share) { - if (!options) { + if (bs->backing) { + if (bs->backing_format[0] != '\0') { options = qdict_new(); + qdict_put_str(options, "driver", bs->backing_format); } - qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); - } - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - blk_old_backing = blk_new_open(backing_name, NULL, - options, src_flags, &local_err); - if (!blk_old_backing) { - error_reportf_err(local_err, - "Could not open old backing file '%s': ", - backing_name); - ret = -1; - goto out; + + if (force_share) { + if (!options) { + options = qdict_new(); + } + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); + } + bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); + blk_old_backing = blk_new_open(backing_name, NULL, + options, src_flags, &local_err); + if (!blk_old_backing) { + error_reportf_err(local_err, + "Could not open old backing file '%s': ", + backing_name); + ret = -1; + goto out; + } + } else { + blk_old_backing = NULL; } if (out_baseimg[0]) { @@ -3383,7 +3388,7 @@ static int img_rebase(int argc, char **argv) */ if (!unsafe) { int64_t size; - int64_t old_backing_size; + int64_t old_backing_size = 0; int64_t new_backing_size = 0; uint64_t offset; int64_t n; @@ -3399,15 +3404,18 @@ static int img_rebase(int argc, char **argv) ret = -1; goto out; } - old_backing_size = blk_getlength(blk_old_backing); - if (old_backing_size < 0) { - char backing_name[PATH_MAX]; + if (blk_old_backing) { + old_backing_size = blk_getlength(blk_old_backing); + if (old_backing_size < 0) { + char backing_name[PATH_MAX]; - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - error_report("Could not get size of '%s': %s", - backing_name, strerror(-old_backing_size)); - ret = -1; - goto out; + bdrv_get_backing_filename(bs, backing_name, + sizeof(backing_name)); + error_report("Could not get size of '%s': %s", + backing_name, strerror(-old_backing_size)); + ret = -1; + goto out; + } } if (blk_new_backing) { new_backing_size = blk_getlength(blk_new_backing); @@ -3424,6 +3432,8 @@ static int img_rebase(int argc, char **argv) } for (offset = 0; offset < size; offset += n) { + bool buf_old_is_zero = false; + /* How many bytes can we handle with the next read? */ n = MIN(IO_BUF_SIZE, size - offset); @@ -3444,6 +3454,7 @@ static int img_rebase(int argc, char **argv) */ if (offset >= old_backing_size) { memset(buf_old, 0, n); + buf_old_is_zero = true; } else { if (offset + n > old_backing_size) { n = old_backing_size - offset; @@ -3479,8 +3490,12 @@ static int img_rebase(int argc, char **argv) if (compare_buffers(buf_old + written, buf_new + written, n - written, &pnum)) { - ret = blk_pwrite(blk, offset + written, - buf_old + written, pnum, 0); + if (buf_old_is_zero) { + ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0); + } else { + ret = blk_pwrite(blk, offset + written, + buf_old + written, pnum, 0); + } if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); diff --git a/tests/qemu-iotests/192 b/tests/qemu-iotests/192 index 158086f9d2..61a88ac88d 100755 --- a/tests/qemu-iotests/192 +++ b/tests/qemu-iotests/192 @@ -29,7 +29,9 @@ status=1 # failure is the default! _cleanup() { - _cleanup_test_img + _cleanup_qemu + _cleanup_test_img + rm -f "$TEST_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/252 b/tests/qemu-iotests/252 new file mode 100755 index 0000000000..f6c8f71444 --- /dev/null +++ b/tests/qemu-iotests/252 @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# +# Tests for rebasing COW images that require zero cluster support +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq=$(basename $0) +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG.base_new" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# Currently only qcow2 and qed support rebasing, and only qcow2 v3 has +# zero cluster support +_supported_fmt qcow2 +_unsupported_imgopts 'compat=0.10' +_supported_proto file +_supported_os Linux + +CLUSTER_SIZE=65536 + +echo +echo "=== Test rebase without input base ===" +echo + +# Cluster allocations to be tested: +# +# Backing (new) 11 -- 11 -- 11 -- +# COW image 22 22 11 11 -- -- +# +# Expected result: +# +# COW image 22 22 11 11 00 -- +# +# (Cluster 2 might be "--" after the rebase, too, but rebase just +# compares the new backing file to the old one and disregards the +# overlay. Therefore, it will never discard overlay clusters.) + +_make_test_img $((6 * CLUSTER_SIZE)) +TEST_IMG="$TEST_IMG.base_new" _make_test_img $((6 * CLUSTER_SIZE)) + +echo + +$QEMU_IO "$TEST_IMG" \ + -c "write -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +$QEMU_IO "$TEST_IMG.base_new" \ + -c "write -P 0x11 $((0 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((4 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + | _filter_qemu_io + +echo + +# This should be a no-op +$QEMU_IMG rebase -b "" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, clusters 4 and 5 should be unallocated (marked as zero +# clusters here because there is no backing file)) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +echo + +$QEMU_IMG rebase -b "$TEST_IMG.base_new" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, cluster 4 should be zero, and cluster 5 should be +# unallocated (signified by '"depth": 1')) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/252.out b/tests/qemu-iotests/252.out new file mode 100644 index 0000000000..12dce889f8 --- /dev/null +++ b/tests/qemu-iotests/252.out @@ -0,0 +1,39 @@ +QA output created by 252 + +=== Test rebase without input base === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=393216 +Formatting 'TEST_DIR/t.IMGFMT.base_new', fmt=IMGFMT size=393216 + +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 262144 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 131072, "depth": 0, "zero": true, "data": false}] + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 65536, "depth": 0, "zero": true, "data": false}, +{ "start": 327680, "length": 65536, "depth": 1, "zero": true, "data": false}] +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 7ac9a5ea4a..00e474ab0a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -249,3 +249,4 @@ 247 rw auto quick 248 rw auto quick 249 rw auto quick +252 rw auto backing quick diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 97ac0b159d..036ed9a3b3 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -354,6 +354,111 @@ static void test_sync_op(const void *opaque) blk_unref(blk); } +typedef struct TestBlockJob { + BlockJob common; + bool should_complete; + int n; +} TestBlockJob; + +static int test_job_prepare(Job *job) +{ + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + return 0; +} + +static int coroutine_fn test_job_run(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { + s->n++; + g_assert(qemu_get_current_aio_context() == job->aio_context); + + /* Avoid job_sleep_ns() because it marks the job as !busy. We want to + * emulate some actual activity (probably some I/O) here so that the + * drain involved in AioContext switches has to wait for this activity + * to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + + job_pause_point(&s->common.job); + } + + g_assert(qemu_get_current_aio_context() == job->aio_context); + return 0; +} + +static void test_job_complete(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + s->should_complete = true; +} + +BlockJobDriver test_job_driver = { + .job_driver = { + .instance_size = sizeof(TestBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_job_run, + .complete = test_job_complete, + .prepare = test_job_prepare, + }, +}; + +static void test_attach_blockjob(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs; + TestBlockJob *tjob; + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + tjob = block_job_create("job0", &test_job_driver, NULL, bs, + 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); + job_start(&tjob->common.job); + + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + job_complete_sync(&tjob->common.job, &error_abort); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +} + int main(int argc, char **argv) { int i; @@ -368,5 +473,7 @@ int main(int argc, char **argv) g_test_add_data_func(t->name, t, test_sync_op); } + g_test_add_func("/attach/blockjob", test_attach_blockjob); + return g_test_run(); } diff --git a/util/aio-posix.c b/util/aio-posix.c index 6fbfa7924f..db11021287 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -519,6 +519,10 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) if (!node->deleted && node->io_poll && aio_node_check(ctx, node->is_external) && node->io_poll(node->opaque)) { + /* + * Polling was successful, exit try_poll_mode immediately + * to adjust the next polling time. + */ *timeout = 0; if (node->opaque != &ctx->notifier) { progress = true; @@ -558,8 +562,9 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) do { progress = run_poll_handlers_once(ctx, timeout); elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; - } while (!progress && elapsed_time < max_ns - && !atomic_read(&ctx->poll_disable_cnt)); + max_ns = qemu_soonest_timeout(*timeout, max_ns); + assert(!(max_ns && progress)); + } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt)); /* If time has passed with no successful polling, adjust *timeout to * keep the same ending time. @@ -585,8 +590,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) */ static bool try_poll_mode(AioContext *ctx, int64_t *timeout) { - /* See qemu_soonest_timeout() uint64_t hack */ - int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns); + int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns); if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) { poll_set_started(ctx, true); diff --git a/util/readline.c b/util/readline.c index ec91ee0fea..a7672b51c1 100644 --- a/util/readline.c +++ b/util/readline.c @@ -48,14 +48,15 @@ static void readline_update(ReadLineState *rs) if (rs->cmd_buf_size != rs->last_cmd_buf_size || memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) { - for(i = 0; i < rs->last_cmd_buf_index; i++) { + for (i = 0; i < rs->last_cmd_buf_index; i++) { rs->printf_func(rs->opaque, "\033[D"); } rs->cmd_buf[rs->cmd_buf_size] = '\0'; if (rs->read_password) { len = strlen(rs->cmd_buf); - for(i = 0; i < len; i++) + for (i = 0; i < len; i++) { rs->printf_func(rs->opaque, "*"); + } } else { rs->printf_func(rs->opaque, "%s", rs->cmd_buf); } @@ -67,12 +68,12 @@ static void readline_update(ReadLineState *rs) if (rs->cmd_buf_index != rs->last_cmd_buf_index) { delta = rs->cmd_buf_index - rs->last_cmd_buf_index; if (delta > 0) { - for(i = 0;i < delta; i++) { + for (i = 0; i < delta; i++) { rs->printf_func(rs->opaque, "\033[C"); } } else { delta = -delta; - for(i = 0;i < delta; i++) { + for (i = 0; i < delta; i++) { rs->printf_func(rs->opaque, "\033[D"); } } @@ -178,35 +179,38 @@ static void readline_up_char(ReadLineState *rs) { int idx; - if (rs->hist_entry == 0) - return; + if (rs->hist_entry == 0) { + return; + } if (rs->hist_entry == -1) { - /* Find latest entry */ - for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { - if (rs->history[idx] == NULL) - break; - } - rs->hist_entry = idx; + /* Find latest entry */ + for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { + if (rs->history[idx] == NULL) { + break; + } + } + rs->hist_entry = idx; } rs->hist_entry--; if (rs->hist_entry >= 0) { - pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), + pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), rs->history[rs->hist_entry]); - rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); + rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); } } static void readline_down_char(ReadLineState *rs) { - if (rs->hist_entry == -1) + if (rs->hist_entry == -1) { return; + } if (rs->hist_entry < READLINE_MAX_CMDS - 1 && rs->history[++rs->hist_entry] != NULL) { - pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), + pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), rs->history[rs->hist_entry]); } else { rs->cmd_buf[0] = 0; - rs->hist_entry = -1; + rs->hist_entry = -1; } rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); } @@ -216,46 +220,50 @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline) char *hist_entry, *new_entry; int idx; - if (cmdline[0] == '\0') - return; + if (cmdline[0] == '\0') { + return; + } new_entry = NULL; if (rs->hist_entry != -1) { - /* We were editing an existing history entry: replace it */ - hist_entry = rs->history[rs->hist_entry]; - idx = rs->hist_entry; - if (strcmp(hist_entry, cmdline) == 0) { - goto same_entry; - } + /* We were editing an existing history entry: replace it */ + hist_entry = rs->history[rs->hist_entry]; + idx = rs->hist_entry; + if (strcmp(hist_entry, cmdline) == 0) { + goto same_entry; + } } /* Search cmdline in history buffers */ for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { - hist_entry = rs->history[idx]; - if (hist_entry == NULL) - break; - if (strcmp(hist_entry, cmdline) == 0) { - same_entry: - new_entry = hist_entry; - /* Put this entry at the end of history */ - memmove(&rs->history[idx], &rs->history[idx + 1], - (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *)); - rs->history[READLINE_MAX_CMDS - 1] = NULL; - for (; idx < READLINE_MAX_CMDS; idx++) { - if (rs->history[idx] == NULL) - break; - } - break; - } + hist_entry = rs->history[idx]; + if (hist_entry == NULL) { + break; + } + if (strcmp(hist_entry, cmdline) == 0) { + same_entry: + new_entry = hist_entry; + /* Put this entry at the end of history */ + memmove(&rs->history[idx], &rs->history[idx + 1], + (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *)); + rs->history[READLINE_MAX_CMDS - 1] = NULL; + for (; idx < READLINE_MAX_CMDS; idx++) { + if (rs->history[idx] == NULL) { + break; + } + } + break; + } } if (idx == READLINE_MAX_CMDS) { - /* Need to get one free slot */ + /* Need to get one free slot */ g_free(rs->history[0]); - memmove(rs->history, &rs->history[1], - (READLINE_MAX_CMDS - 1) * sizeof(char *)); - rs->history[READLINE_MAX_CMDS - 1] = NULL; - idx = READLINE_MAX_CMDS - 1; + memmove(rs->history, &rs->history[1], + (READLINE_MAX_CMDS - 1) * sizeof(char *)); + rs->history[READLINE_MAX_CMDS - 1] = NULL; + idx = READLINE_MAX_CMDS - 1; } - if (new_entry == NULL) + if (new_entry == NULL) { new_entry = g_strdup(cmdline); + } rs->history[idx] = new_entry; rs->hist_entry = -1; } @@ -297,49 +305,55 @@ static void readline_completion(ReadLineState *rs) g_free(cmdline); /* no completion found */ - if (rs->nb_completions <= 0) + if (rs->nb_completions <= 0) { return; + } if (rs->nb_completions == 1) { len = strlen(rs->completions[0]); - for(i = rs->completion_index; i < len; i++) { + for (i = rs->completion_index; i < len; i++) { readline_insert_char(rs, rs->completions[0][i]); } /* extra space for next argument. XXX: make it more generic */ - if (len > 0 && rs->completions[0][len - 1] != '/') + if (len > 0 && rs->completions[0][len - 1] != '/') { readline_insert_char(rs, ' '); + } } else { qsort(rs->completions, rs->nb_completions, sizeof(char *), completion_comp); rs->printf_func(rs->opaque, "\n"); max_width = 0; - max_prefix = 0; - for(i = 0; i < rs->nb_completions; i++) { + max_prefix = 0; + for (i = 0; i < rs->nb_completions; i++) { len = strlen(rs->completions[i]); - if (i==0) { + if (i == 0) { max_prefix = len; } else { - if (len < max_prefix) + if (len < max_prefix) { max_prefix = len; - for(j=0; j<max_prefix; j++) { - if (rs->completions[i][j] != rs->completions[0][j]) + } + for (j = 0; j < max_prefix; j++) { + if (rs->completions[i][j] != rs->completions[0][j]) { max_prefix = j; + } } } - if (len > max_width) + if (len > max_width) { max_width = len; + } } - if (max_prefix > 0) - for(i = rs->completion_index; i < max_prefix; i++) { + if (max_prefix > 0) + for (i = rs->completion_index; i < max_prefix; i++) { readline_insert_char(rs, rs->completions[0][i]); } max_width += 2; - if (max_width < 10) + if (max_width < 10) { max_width = 10; - else if (max_width > 80) + } else if (max_width > 80) { max_width = 80; + } nb_cols = 80 / max_width; j = 0; - for(i = 0; i < rs->nb_completions; i++) { + for (i = 0; i < rs->nb_completions; i++) { rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]); if (++j == nb_cols || i == (rs->nb_completions - 1)) { rs->printf_func(rs->opaque, "\n"); @@ -362,9 +376,9 @@ static void readline_clear_screen(ReadLineState *rs) /* return true if command handled */ void readline_handle_byte(ReadLineState *rs, int ch) { - switch(rs->esc_state) { + switch (rs->esc_state) { case IS_NORM: - switch(ch) { + switch (ch) { case 1: readline_bol(rs); break; @@ -383,8 +397,9 @@ void readline_handle_byte(ReadLineState *rs, int ch) case 10: case 13: rs->cmd_buf[rs->cmd_buf_size] = '\0'; - if (!rs->read_password) + if (!rs->read_password) { readline_hist_add(rs, rs->cmd_buf); + } rs->printf_func(rs->opaque, "\n"); rs->cmd_buf_index = 0; rs->cmd_buf_size = 0; @@ -403,9 +418,9 @@ void readline_handle_byte(ReadLineState *rs, int ch) case 8: readline_backspace(rs); break; - case 155: + case 155: rs->esc_state = IS_CSI; - break; + break; default: if (ch >= 32) { readline_insert_char(rs, ch); @@ -425,15 +440,15 @@ void readline_handle_byte(ReadLineState *rs, int ch) } break; case IS_CSI: - switch(ch) { - case 'A': - case 'F': - readline_up_char(rs); - break; - case 'B': - case 'E': - readline_down_char(rs); - break; + switch (ch) { + case 'A': + case 'F': + readline_up_char(rs); + break; + case 'B': + case 'E': + readline_down_char(rs); + break; case 'D': readline_backward_char(rs); break; @@ -444,7 +459,7 @@ void readline_handle_byte(ReadLineState *rs, int ch) rs->esc_param = rs->esc_param * 10 + (ch - '0'); goto the_end; case '~': - switch(rs->esc_param) { + switch (rs->esc_param) { case 1: readline_bol(rs); break; @@ -463,7 +478,7 @@ void readline_handle_byte(ReadLineState *rs, int ch) the_end: break; case IS_SS3: - switch(ch) { + switch (ch) { case 'F': readline_eol(rs); break; @@ -495,8 +510,9 @@ void readline_restart(ReadLineState *rs) const char *readline_get_history(ReadLineState *rs, unsigned int index) { - if (index >= READLINE_MAX_CMDS) + if (index >= READLINE_MAX_CMDS) { return NULL; + } return rs->history[index]; } |