6 files changed, 139 insertions, 215 deletions
diff --git a/Makefile b/Makefile
index 8e2fc6624c..cfb18f1525 100644
--- a/Makefile
+++ b/Makefile
@@ -731,6 +731,7 @@ distclean: clean
 	rm -rf .doctrees
 	$(call clean-manual,devel)
 	$(call clean-manual,interop)
+	$(call clean-manual,specs)
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
         done
@@ -781,6 +782,7 @@ endef
 .PHONY: install-sphinxdocs
 install-sphinxdocs: sphinxdocs
 	$(call install-manual,interop)
+	$(call install-manual,specs)
 
 install-doc: $(DOCS) install-sphinxdocs
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
@@ -962,7 +964,7 @@ docs/version.texi: $(SRC_PATH)/VERSION config-host.mak
 # and handles "don't rebuild things unless necessary" itself.
 # The '.doctrees' files are cached information to speed this up.
 .PHONY: sphinxdocs
-sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index.html
+sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index.html $(MANUAL_BUILDDIR)/specs/index.html
 
 # Canned command to build a single manual
 build-manual = $(call quiet-command,sphinx-build $(if $(V),,-q) -W -n -b html -D version=$(VERSION) -D release="$(FULL_VERSION)" -d .doctrees/$1 $(SRC_PATH)/docs/$1 $(MANUAL_BUILDDIR)/$1 ,"SPHINX","$(MANUAL_BUILDDIR)/$1")
@@ -975,6 +977,9 @@ $(MANUAL_BUILDDIR)/devel/index.html: $(call manual-deps,devel)
 $(MANUAL_BUILDDIR)/interop/index.html: $(call manual-deps,interop)
 	$(call build-manual,interop)
 
+$(MANUAL_BUILDDIR)/specs/index.html: $(call manual-deps,specs)
+	$(call build-manual,specs)
+
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 2a4ddf40ad..1ec61fcfed 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -21,3 +21,4 @@ Contents:
    testing
    decodetree
    secure-coding-practices
+   tcg
diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
new file mode 100644
index 0000000000..4956a30a4e
--- /dev/null
+++ b/docs/devel/tcg.rst
@@ -0,0 +1,111 @@
+====================
+Translator Internals
+====================
+
+QEMU is a dynamic translator. When it first encounters a piece of code,
+it converts it to the host instruction set. Usually dynamic translators
+are very complicated and highly CPU dependent. QEMU uses some tricks
+which make it relatively easily portable and simple while achieving good
+performances.
+
+QEMU's dynamic translation backend is called TCG, for "Tiny Code
+Generator". For more information, please take a look at ``tcg/README``.
+
+Some notable features of QEMU's dynamic translator are:
+
+CPU state optimisations
+-----------------------
+
+The target CPUs have many internal states which change the way it
+evaluates instructions. In order to achieve a good speed, the
+translation phase considers that some state information of the virtual
+CPU cannot change in it. The state is recorded in the Translation
+Block (TB). If the state changes (e.g. privilege level), a new TB will
+be generated and the previous TB won't be used anymore until the state
+matches the state recorded in the previous TB. The same idea can be applied
+to other aspects of the CPU state.  For example, on x86, if the SS,
+DS and ES segments have a zero base, then the translator does not even
+generate an addition for the segment base.
+
+Direct block chaining
+---------------------
+
+After each translated basic block is executed, QEMU uses the simulated
+Program Counter (PC) and other cpu state information (such as the CS
+segment base value) to find the next basic block.
+
+In order to accelerate the most common cases where the new simulated PC
+is known, QEMU can patch a basic block so that it jumps directly to the
+next one.
+
+The most portable code uses an indirect jump. An indirect jump makes
+it easier to make the jump target modification atomic. On some host
+architectures (such as x86 or PowerPC), the ``JUMP`` opcode is
+directly patched so that the block chaining has no overhead.
+
+Self-modifying code and translated code invalidation
+----------------------------------------------------
+
+Self-modifying code is a special challenge in x86 emulation because no
+instruction cache invalidation is signaled by the application when code
+is modified.
+
+User-mode emulation marks a host page as write-protected (if it is
+not already read-only) every time translated code is generated for a
+basic block.  Then, if a write access is done to the page, Linux raises
+a SEGV signal. QEMU then invalidates all the translated code in the page
+and enables write accesses to the page.  For system emulation, write
+protection is achieved through the software MMU.
+
+Correct translated code invalidation is done efficiently by maintaining
+a linked list of every translated block contained in a given page. Other
+linked lists are also maintained to undo direct block chaining.
+
+On RISC targets, correctly written software uses memory barriers and
+cache flushes, so some of the protection above would not be
+necessary. However, QEMU still requires that the generated code always
+matches the target instructions in memory in order to handle
+exceptions correctly.
+
+Exception support
+-----------------
+
+longjmp() is used when an exception such as division by zero is
+encountered.
+
+The host SIGSEGV and SIGBUS signal handlers are used to get invalid
+memory accesses.  QEMU keeps a map from host program counter to
+target program counter, and looks up where the exception happened
+based on the host program counter at the exception point.
+
+On some targets, some bits of the virtual CPU's state are not flushed to the
+memory until the end of the translation block.  This is done for internal
+emulation state that is rarely accessed directly by the program and/or changes
+very often throughout the execution of a translation block---this includes
+condition codes on x86, delay slots on SPARC, conditional execution on
+ARM, and so on.  This state is stored for each target instruction, and
+looked up on exceptions.
+
+MMU emulation
+-------------
+
+For system emulation QEMU uses a software MMU. In that mode, the MMU
+virtual to physical address translation is done at every memory
+access.
+
+QEMU uses an address translation cache (TLB) to speed up the translation.
+In order to avoid flushing the translated code each time the MMU
+mappings change, all caches in QEMU are physically indexed.  This
+means that each basic block is indexed with its physical address.
+
+In order to avoid invalidating the basic block chain when MMU mappings
+change, chaining is only performed when the destination of the jump
+shares a page with the basic block that is performing the jump.
+
+The MMU can also distinguish RAM and ROM memory areas from MMIO memory
+areas.  Access is faster for RAM and ROM because the translation cache also
+hosts the offset between guest address and host memory.  Accessing MMIO
+memory areas instead calls out to C code for device emulation.
+Finally, the MMU helps tracking dirty pages and pages pointed to by
+translation blocks.
+
diff --git a/docs/specs/conf.py b/docs/specs/conf.py
new file mode 100644
index 0000000000..4d56f3ae13
--- /dev/null
+++ b/docs/specs/conf.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+#
+# QEMU documentation build configuration file for the 'specs' manual.
+#
+# This includes the top level conf file and then makes any necessary tweaks.
+import sys
+import os
+
+qemu_docdir = os.path.abspath("..")
+parent_config = os.path.join(qemu_docdir, "conf.py")
+exec(compile(open(parent_config, "rb").read(), parent_config, 'exec'))
+
+# This slightly misuses the 'description', but is the best way to get
+# the manual title to appear in the sidebar.
+html_theme_options['description'] = \
+    u'System Emulation Guest Hardware Specifications'
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 2e927519c2..40adb97c5e 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -1,8 +1,8 @@
-. This is the top level page for the 'specs' manual
+.. This is the top level page for the 'specs' manual
 
 
-QEMU full-system emulation guest hardware specifications
-========================================================
+QEMU System Emulation Guest Hardware Specifications
+===================================================
 
 
 Contents:
@@ -10,4 +10,5 @@ Contents:
 .. toctree::
    :maxdepth: 2
 
-   xive
+   ppc-xive
+   ppc-spapr-xive
diff --git a/qemu-tech.texi b/qemu-tech.texi
index 7c3d1f05e1..3451cfaa5b 100644
--- a/qemu-tech.texi
+++ b/qemu-tech.texi
@@ -161,160 +161,6 @@ may be created from overlay with minimal amount of hand-written code.
 
 @end itemize
 
-@node Translator Internals
-@section Translator Internals
-
-QEMU is a dynamic translator. When it first encounters a piece of code,
-it converts it to the host instruction set. Usually dynamic translators
-are very complicated and highly CPU dependent. QEMU uses some tricks
-which make it relatively easily portable and simple while achieving good
-performances.
-
-QEMU's dynamic translation backend is called TCG, for "Tiny Code
-Generator". For more information, please take a look at @code{tcg/README}.
-
-Some notable features of QEMU's dynamic translator are:
-
-@table @strong
-
-@item CPU state optimisations:
-The target CPUs have many internal states which change the way it
-evaluates instructions. In order to achieve a good speed, the
-translation phase considers that some state information of the virtual
-CPU cannot change in it. The state is recorded in the Translation
-Block (TB). If the state changes (e.g. privilege level), a new TB will
-be generated and the previous TB won't be used anymore until the state
-matches the state recorded in the previous TB. The same idea can be applied
-to other aspects of the CPU state.  For example, on x86, if the SS,
-DS and ES segments have a zero base, then the translator does not even
-generate an addition for the segment base.
-
-@item Direct block chaining:
-After each translated basic block is executed, QEMU uses the simulated
-Program Counter (PC) and other cpu state information (such as the CS
-segment base value) to find the next basic block.
-
-In order to accelerate the most common cases where the new simulated PC
-is known, QEMU can patch a basic block so that it jumps directly to the
-next one.
-
-The most portable code uses an indirect jump. An indirect jump makes
-it easier to make the jump target modification atomic. On some host
-architectures (such as x86 or PowerPC), the @code{JUMP} opcode is
-directly patched so that the block chaining has no overhead.
-
-@item Self-modifying code and translated code invalidation:
-Self-modifying code is a special challenge in x86 emulation because no
-instruction cache invalidation is signaled by the application when code
-is modified.
-
-User-mode emulation marks a host page as write-protected (if it is
-not already read-only) every time translated code is generated for a
-basic block.  Then, if a write access is done to the page, Linux raises
-a SEGV signal. QEMU then invalidates all the translated code in the page
-and enables write accesses to the page.  For system emulation, write
-protection is achieved through the software MMU.
-
-Correct translated code invalidation is done efficiently by maintaining
-a linked list of every translated block contained in a given page. Other
-linked lists are also maintained to undo direct block chaining.
-
-On RISC targets, correctly written software uses memory barriers and
-cache flushes, so some of the protection above would not be
-necessary. However, QEMU still requires that the generated code always
-matches the target instructions in memory in order to handle
-exceptions correctly.
-
-@item Exception support:
-longjmp() is used when an exception such as division by zero is
-encountered.
-
-The host SIGSEGV and SIGBUS signal handlers are used to get invalid
-memory accesses.  QEMU keeps a map from host program counter to
-target program counter, and looks up where the exception happened
-based on the host program counter at the exception point.
-
-On some targets, some bits of the virtual CPU's state are not flushed to the
-memory until the end of the translation block.  This is done for internal
-emulation state that is rarely accessed directly by the program and/or changes
-very often throughout the execution of a translation block---this includes
-condition codes on x86, delay slots on SPARC, conditional execution on
-ARM, and so on.  This state is stored for each target instruction, and
-looked up on exceptions.
-
-@item MMU emulation:
-For system emulation QEMU uses a software MMU. In that mode, the MMU
-virtual to physical address translation is done at every memory
-access.
-
-QEMU uses an address translation cache (TLB) to speed up the translation.
-In order to avoid flushing the translated code each time the MMU
-mappings change, all caches in QEMU are physically indexed.  This
-means that each basic block is indexed with its physical address.
-
-In order to avoid invalidating the basic block chain when MMU mappings
-change, chaining is only performed when the destination of the jump
-shares a page with the basic block that is performing the jump.
-
-The MMU can also distinguish RAM and ROM memory areas from MMIO memory
-areas.  Access is faster for RAM and ROM because the translation cache also
-hosts the offset between guest address and host memory.  Accessing MMIO
-memory areas instead calls out to C code for device emulation.
-Finally, the MMU helps tracking dirty pages and pages pointed to by
-translation blocks.
-@end table
-
-@node QEMU compared to other emulators
-@section QEMU compared to other emulators
-
-Like bochs [1], QEMU emulates an x86 CPU. But QEMU is much faster than
-bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC
-emulation while QEMU can emulate several processors.
-
-Like Valgrind [2], QEMU does user space emulation and dynamic
-translation. Valgrind is mainly a memory debugger while QEMU has no
-support for it (QEMU could be used to detect out of bound memory
-accesses as Valgrind, but it has no support to track uninitialised data
-as Valgrind does). The Valgrind dynamic translator generates better code
-than QEMU (in particular it does register allocation) but it is closely
-tied to an x86 host and target and has no support for precise exceptions
-and system emulation.
-
-EM86 [3] is the closest project to user space QEMU (and QEMU still uses
-some of its code, in particular the ELF file loader). EM86 was limited
-to an alpha host and used a proprietary and slow interpreter (the
-interpreter part of the FX!32 Digital Win32 code translator [4]).
-
-TWIN from Willows Software was a Windows API emulator like Wine. It is less
-accurate than Wine but includes a protected mode x86 interpreter to launch
-x86 Windows executables. Such an approach has greater potential because most
-of the Windows API is executed natively but it is far more difficult to
-develop because all the data structures and function parameters exchanged
-between the API and the x86 code must be converted.
-
-User mode Linux [5] was the only solution before QEMU to launch a
-Linux kernel as a process while not needing any host kernel
-patches. However, user mode Linux requires heavy kernel patches while
-QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is
-slower.
-
-The Plex86 [6] PC virtualizer is done in the same spirit as the now
-obsolete qemu-fast system emulator. It requires a patched Linux kernel
-to work (you cannot launch the same kernel on your PC), but the
-patches are really small. As it is a PC virtualizer (no emulation is
-done except for some privileged instructions), it has the potential of
-being faster than QEMU. The downside is that a complicated (and
-potentially unsafe) host kernel patch is needed.
-
-The commercial PC Virtualizers (VMWare [7], VirtualPC [8]) are faster
-than QEMU (without virtualization), but they all need specific, proprietary
-and potentially unsafe host drivers. Moreover, they are unable to
-provide cycle exact simulation as an emulator can.
-
-VirtualBox [9], Xen [10] and KVM [11] are based on QEMU. QEMU-SystemC
-[12] uses QEMU to simulate a system where some hardware devices are
-developed in SystemC.
-
 @node Managed start up options
 @section Managed start up options
 
@@ -350,59 +196,3 @@ depend on an initialized machine, including but not limited to:
 @item query-status
 @item x-exit-preconfig
 @end table
-
-@node Bibliography
-@section Bibliography
-
-@table @asis
-
-@item [1]
-@url{http://bochs.sourceforge.net/}, the Bochs IA-32 Emulator Project,
-by Kevin Lawton et al.
-
-@item [2]
-@url{http://www.valgrind.org/}, Valgrind, an open-source memory debugger
-for GNU/Linux.
-
-@item [3]
-@url{http://ftp.dreamtime.org/pub/linux/Linux-Alpha/em86/v0.2/docs/em86.html},
-the EM86 x86 emulator on Alpha-Linux.
-
-@item [4]
-@url{http://www.usenix.org/publications/library/proceedings/usenix-nt97/@/full_papers/chernoff/chernoff.pdf},
-DIGITAL FX!32: Running 32-Bit x86 Applications on Alpha NT, by Anton
-Chernoff and Ray Hookway.
-
-@item [5]
-@url{http://user-mode-linux.sourceforge.net/},
-The User-mode Linux Kernel.
-
-@item [6]
-@url{http://www.plex86.org/},
-The new Plex86 project.
-
-@item [7]
-@url{http://www.vmware.com/},
-The VMWare PC virtualizer.
-
-@item [8]
-@url{https://www.microsoft.com/download/details.aspx?id=3702},
-The VirtualPC PC virtualizer.
-
-@item [9]
-@url{http://virtualbox.org/},
-The VirtualBox PC virtualizer.
-
-@item [10]
-@url{http://www.xen.org/},
-The Xen hypervisor.
-
-@item [11]
-@url{http://www.linux-kvm.org/},
-Kernel Based Virtual Machine (KVM).
-
-@item [12]
-@url{http://www.greensocs.com/projects/QEMUSystemC},
-QEMU-SystemC, a hardware co-simulator.
-
-@end table