From dab32b321f4d510ed5171b12f68bd5aa7a02cffe Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 15 Jul 2016 14:57:26 +0100
Subject: linux-user: Fix handling of iovec counts

In the kernel the length of an iovec is generally handled as
an unsigned long, not an integer; fix the parameter to
lock_iovec() accordingly.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ca06943f3b..71f40e3ab8 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3119,7 +3119,7 @@ static abi_long do_getsockopt(int sockfd, int level, int optname,
 }
 
 static struct iovec *lock_iovec(int type, abi_ulong target_addr,
-                                int count, int copy)
+                                abi_ulong count, int copy)
 {
     struct target_iovec *target_vec;
     struct iovec *vec;
@@ -3132,7 +3132,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
         errno = 0;
         return NULL;
     }
-    if (count < 0 || count > IOV_MAX) {
+    if (count > IOV_MAX) {
         errno = EINVAL;
         return NULL;
     }
@@ -3207,7 +3207,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
 }
 
 static void unlock_iovec(struct iovec *vec, abi_ulong target_addr,
-                         int count, int copy)
+                         abi_ulong count, int copy)
 {
     struct target_iovec *target_vec;
     int i;
@@ -3462,7 +3462,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
 {
     abi_long ret, len;
     struct msghdr msg;
-    int count;
+    abi_ulong count;
     struct iovec *vec;
     abi_ulong target_vec;
 
-- 
cgit v1.2.3


From 97b079703350ec0f6625788fb380f1fa14d0e2c4 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 15 Jul 2016 14:57:27 +0100
Subject: linux-user: Fix errno for sendrecvmsg with large iovec length

The sendmsg and recvmsg syscalls use a different errno to indicate
an overlarge iovec length from readv and writev. Handle this
special case in do_sendrcvmsg_locked() to avoid getting the
default errno returned by lock_iovec().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 71f40e3ab8..9d18326467 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3485,6 +3485,15 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
 
     count = tswapal(msgp->msg_iovlen);
     target_vec = tswapal(msgp->msg_iov);
+
+    if (count > IOV_MAX) {
+        /* sendrcvmsg returns a different errno for this condition than
+         * readv/writev, so we must catch it here before lock_iovec() does.
+         */
+        ret = -TARGET_EMSGSIZE;
+        goto out2;
+    }
+
     vec = lock_iovec(send ? VERIFY_READ : VERIFY_WRITE,
                      target_vec, count, send);
     if (vec == NULL) {
-- 
cgit v1.2.3


From 26a6fc96e0ca7522b855c2164bc6098240c286f6 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 15 Jul 2016 14:57:28 +0100
Subject: linux-user: Allow bad msg_name for recvfrom on connected socket

The POSIX standard mandates that for a connected socket recvfrom()
must ignore the msg_name and msg_namelen fields. This is awkward
for QEMU because we will attempt to copy them from guest address
space. Handle this by not immediately returning a TARGET_EFAULT
if the copy failed, but instead passing a known-bad address
to the host kernel, which can then return EFAULT or ignore the
value appropriately.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d18326467..51f558d47d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3472,7 +3472,14 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
         ret = target_to_host_sockaddr(fd, msg.msg_name,
                                       tswapal(msgp->msg_name),
                                       msg.msg_namelen);
-        if (ret) {
+        if (ret == -TARGET_EFAULT) {
+            /* For connected sockets msg_name and msg_namelen must
+             * be ignored, so returning EFAULT immediately is wrong.
+             * Instead, pass a bad msg_name to the host kernel, and
+             * let it decide whether to return EFAULT or not.
+             */
+            msg.msg_name = (void *)-1;
+        } else if (ret) {
             goto out2;
         }
     } else {
@@ -3534,7 +3541,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
             }
             if (!is_error(ret)) {
                 msgp->msg_namelen = tswap32(msg.msg_namelen);
-                if (msg.msg_name != NULL) {
+                if (msg.msg_name != NULL && msg.msg_name != (void *)-1) {
                     ret = host_to_target_sockaddr(tswapal(msgp->msg_name),
                                     msg.msg_name, msg.msg_namelen);
                     if (ret) {
-- 
cgit v1.2.3


From 700fa58e4b9100d6bd77df06d2e5d1f457720c4d Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 18 Jul 2016 11:47:55 +0100
Subject: linux-user: Use direct syscall for utimensat

The linux utimensat syscall differs in semantics from the
libc function because the syscall combines the features
of utimensat() and futimens(). Rather than trying to
split these apart in order to call the two libc functions
which then call the same underlying syscall, just always
directly make the host syscall. This fixes bugs in some
of the corner cases which should return errors from the
syscall but which we were incorrectly directing to futimens().

This doesn't reduce the set of hosts that our syscall
implementation will work on, because if the direct syscall
fails ENOSYS then the libc functions would also fail ENOSYS.
(The system call has been in the kernel since 2.6.22 anyway.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 51f558d47d..21ae996dd1 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -520,16 +520,7 @@ static int sys_getcwd1(char *buf, size_t size)
 }
 
 #ifdef TARGET_NR_utimensat
-#ifdef CONFIG_UTIMENSAT
-static int sys_utimensat(int dirfd, const char *pathname,
-    const struct timespec times[2], int flags)
-{
-    if (pathname == NULL)
-        return futimens(dirfd, times);
-    else
-        return utimensat(dirfd, pathname, times, flags);
-}
-#elif defined(__NR_utimensat)
+#if defined(__NR_utimensat)
 #define __NR_sys_utimensat __NR_utimensat
 _syscall4(int,sys_utimensat,int,dirfd,const char *,pathname,
           const struct timespec *,tsp,int,flags)
-- 
cgit v1.2.3


From 2ba7fae3bd688f5bb6cb08defc731d77e6bd943c Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 18 Jul 2016 15:35:59 +0100
Subject: linux-user: Check for bad event numbers in epoll_wait

The kernel checks that the maxevents parameter to epoll_wait
is non-negative and not larger than EP_MAX_EVENTS. Add this
check to our implementation, so that:
 * we fail these cases EINVAL rather than EFAULT
 * we don't pass negative or overflowing values to the
   lock_user() size calculation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 21ae996dd1..eecccbb25c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -11501,6 +11501,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         int maxevents = arg3;
         int timeout = arg4;
 
+        if (maxevents <= 0 || maxevents > TARGET_EP_MAX_EVENTS) {
+            ret = -TARGET_EINVAL;
+            break;
+        }
+
         target_ep = lock_user(VERIFY_WRITE, arg2,
                               maxevents * sizeof(struct target_epoll_event), 1);
         if (!target_ep) {
-- 
cgit v1.2.3


From ce9c139d93db03e464341385976606b7568b768f Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 18 Jul 2016 16:30:36 +0100
Subject: linux-user: Range check the nfds argument to ppoll syscall

Do an initial range check on the ppoll syscall's nfds argument,
to avoid possible overflow in the calculation of the lock_user()
size argument. The host kernel will later apply the rather lower
limit based on RLIMIT_NOFILE as appropriate.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index eecccbb25c..7a50a57d4b 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9661,6 +9661,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             pfd = NULL;
             target_pfd = NULL;
             if (nfds) {
+                if (nfds > (INT_MAX / sizeof(struct target_pollfd))) {
+                    ret = -TARGET_EINVAL;
+                    break;
+                }
+
                 target_pfd = lock_user(VERIFY_WRITE, arg1,
                                        sizeof(struct target_pollfd) * nfds, 1);
                 if (!target_pfd) {
-- 
cgit v1.2.3


From 3211215e741f6e4824ddfc4919428e8d1b82a3c2 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 12 Jul 2016 13:02:13 +0100
Subject: linux-user: Check lock_user() return value for NULL

lock_user() can return NULL, which typically means the syscall
should fail with EFAULT. Add checks in various places where
Coverity spotted that we were missing them.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 7a50a57d4b..efcc17a3b0 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -5008,6 +5008,11 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
     host_data = (char*)host_dm + host_dm->data_start;
 
     argptr = lock_user(VERIFY_READ, guest_data, guest_data_size, 1);
+    if (!argptr) {
+        ret = -TARGET_EFAULT;
+        goto out;
+    }
+
     switch (ie->host_cmd) {
     case DM_REMOVE_ALL:
     case DM_LIST_DEVICES:
@@ -11271,6 +11276,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 
     case TARGET_NR_mq_unlink:
         p = lock_user_string(arg1 - 1);
+        if (!p) {
+            ret = -TARGET_EFAULT;
+            break;
+        }
         ret = get_errno(mq_unlink(p));
         unlock_user (p, arg1, 0);
         break;
-- 
cgit v1.2.3


From f9757b1d9649cb739ecf544c7137c0885281f6e8 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 12 Jul 2016 13:02:14 +0100
Subject: linux-user: Fix incorrect use of host errno in do_ioctl_dm()

do_ioctl_dm() should return target errno values, not host ones;
correct an accidental use of a host errno in an error path.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index efcc17a3b0..e28690713a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -5001,7 +5001,7 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
 
     guest_data = arg + host_dm->data_start;
     if ((guest_data - arg) < 0) {
-        ret = -EINVAL;
+        ret = -TARGET_EINVAL;
         goto out;
     }
     guest_data_size = host_dm->data_size - host_dm->data_start;
-- 
cgit v1.2.3


From ee8e76141b4dd00f8e97fda274876a17f9a46bbe Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 11 Jul 2016 16:48:11 +0100
Subject: linux-user: Use correct target SHMLBA in shmat()

The shmat() handling needs to do target-specific handling
of the attach address for shmat():
 * if the SHM_RND flag is passed, the address is rounded
   down to a SHMLBA boundary
 * if SHM_RND is not passed, then the call is failed EINVAL
   if the address is not a multiple of SHMLBA

Since SHMLBA is target-specific, we need to do this
checking and rounding in QEMU and can't leave it up to the
host syscall.

Allow targets to define TARGET_FORCE_SHMLBA and provide
a target_shmlba() function if appropriate, and update
do_shmat() to honour them.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index e28690713a..85699f9f31 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -4575,12 +4575,34 @@ static inline abi_long do_shmctl(int shmid, int cmd, abi_long buf)
     return ret;
 }
 
-static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg)
+#ifndef TARGET_FORCE_SHMLBA
+/* For most architectures, SHMLBA is the same as the page size;
+ * some architectures have larger values, in which case they should
+ * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
+ * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
+ * and defining its own value for SHMLBA.
+ *
+ * The kernel also permits SHMLBA to be set by the architecture to a
+ * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
+ * this means that addresses are rounded to the large size if
+ * SHM_RND is set but addresses not aligned to that size are not rejected
+ * as long as they are at least page-aligned. Since the only architecture
+ * which uses this is ia64 this code doesn't provide for that oddity.
+ */
+static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
+{
+    return TARGET_PAGE_SIZE;
+}
+#endif
+
+static inline abi_ulong do_shmat(CPUArchState *cpu_env,
+                                 int shmid, abi_ulong shmaddr, int shmflg)
 {
     abi_long raddr;
     void *host_raddr;
     struct shmid_ds shm_info;
     int i,ret;
+    abi_ulong shmlba;
 
     /* find out the length of the shared memory segment */
     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
@@ -4589,6 +4611,16 @@ static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg)
         return ret;
     }
 
+    shmlba = target_shmlba(cpu_env);
+
+    if (shmaddr & (shmlba - 1)) {
+        if (shmflg & SHM_RND) {
+            shmaddr &= ~(shmlba - 1);
+        } else {
+            return -TARGET_EINVAL;
+        }
+    }
+
     mmap_lock();
 
     if (shmaddr)
@@ -4647,7 +4679,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
 #ifdef TARGET_NR_ipc
 /* ??? This only works with linear mappings.  */
 /* do_ipc() must return target values and target errnos. */
-static abi_long do_ipc(unsigned int call, abi_long first,
+static abi_long do_ipc(CPUArchState *cpu_env,
+                       unsigned int call, abi_long first,
                        abi_long second, abi_long third,
                        abi_long ptr, abi_long fifth)
 {
@@ -4716,7 +4749,7 @@ static abi_long do_ipc(unsigned int call, abi_long first,
         default:
         {
             abi_ulong raddr;
-            raddr = do_shmat(first, ptr, second);
+            raddr = do_shmat(cpu_env, first, ptr, second);
             if (is_error(raddr))
                 return get_errno(raddr);
             if (put_user_ual(raddr, third))
@@ -9304,8 +9337,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #ifdef TARGET_NR_ipc
     case TARGET_NR_ipc:
-	ret = do_ipc(arg1, arg2, arg3, arg4, arg5, arg6);
-	break;
+        ret = do_ipc(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+        break;
 #endif
 #ifdef TARGET_NR_semget
     case TARGET_NR_semget:
@@ -9354,7 +9387,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_shmat
     case TARGET_NR_shmat:
-        ret = do_shmat(arg1, arg2, arg3);
+        ret = do_shmat(cpu_env, arg1, arg2, arg3);
         break;
 #endif
 #ifdef TARGET_NR_shmdt
-- 
cgit v1.2.3


From 9d2803f720d5b71937c0f564bb2c16d8f5e18c8c Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 28 Jul 2016 16:44:46 +0100
Subject: linux-user: Pass si_type information to queue_signal() explicitly

Instead of assuming in queue_signal() that all callers are passing
a siginfo structure which uses the _sifields._sigfault part of the
union (and thus a si_type of QEMU_SI_FAULT), make callers pass
the si_type they require in as an argument.

[RV adjusted to apply]
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 85699f9f31..27ad6a2a6c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -10577,7 +10577,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                     info.si_code = si_code;
                     info._sifields._sigfault._addr
                         = ((CPUArchState *)cpu_env)->pc;
-                    queue_signal((CPUArchState *)cpu_env, info.si_signo, &info);
+                    queue_signal((CPUArchState *)cpu_env, info.si_signo,
+                                 QEMU_SI_FAULT, &info);
                 }
             }
             break;
@@ -11665,7 +11666,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             info.si_errno = 0;
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = arg6;
-            queue_signal((CPUArchState *)cpu_env, info.si_signo, &info);
+            queue_signal((CPUArchState *)cpu_env, info.si_signo,
+                         QEMU_SI_FAULT, &info);
             ret = 0xdeadbeef;
 
         }
-- 
cgit v1.2.3


From 7cfbd386b92e93fbfae033b9ac89a20d1fe72573 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 2 Aug 2016 18:41:26 +0100
Subject: linux-user: Remove unnecessary nptl_flags variable from do_fork()

The 'nptl_flags' variable in do_fork() is set to a copy of
'flags', and then the CLONE_NPTL_FLAGS are cleared out of 'flags'.
However the only effect of this is that the later check on
"if (flags & CLONE_PARENT_SETTID)" is never true. Since we
will already have done the setting of parent_tidptr in clone_func()
in the child thread, we don't need to do it again.

Delete the dead if() and the clearing of CLONE_NPTL_FLAGS from
'flags', and then use 'flags' where we were previously using
'nptl_flags', so we can delete the unnecessary variable.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 27ad6a2a6c..3b7b51f6c7 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6011,7 +6011,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
     TaskState *ts;
     CPUState *new_cpu;
     CPUArchState *new_env;
-    unsigned int nptl_flags;
     sigset_t sigmask;
 
     /* Emulate vfork() with fork() */
@@ -6034,15 +6033,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         ts->bprm = parent_ts->bprm;
         ts->info = parent_ts->info;
         ts->signal_mask = parent_ts->signal_mask;
-        nptl_flags = flags;
-        flags &= ~CLONE_NPTL_FLAGS2;
 
-        if (nptl_flags & CLONE_CHILD_CLEARTID) {
+        if (flags & CLONE_CHILD_CLEARTID) {
             ts->child_tidptr = child_tidptr;
         }
 
-        if (nptl_flags & CLONE_SETTLS)
+        if (flags & CLONE_SETTLS) {
             cpu_set_tls (new_env, newtls);
+        }
 
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
@@ -6052,10 +6050,12 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         pthread_mutex_lock(&info.mutex);
         pthread_cond_init(&info.cond, NULL);
         info.env = new_env;
-        if (nptl_flags & CLONE_CHILD_SETTID)
+        if (flags & CLONE_CHILD_SETTID) {
             info.child_tidptr = child_tidptr;
-        if (nptl_flags & CLONE_PARENT_SETTID)
+        }
+        if (flags & CLONE_PARENT_SETTID) {
             info.parent_tidptr = parent_tidptr;
+        }
 
         ret = pthread_attr_init(&attr);
         ret = pthread_attr_setstacksize(&attr, NEW_STACK_SIZE);
@@ -6074,8 +6074,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
             /* Wait for the child to initialize.  */
             pthread_cond_wait(&info.cond, &info.mutex);
             ret = info.tid;
-            if (flags & CLONE_PARENT_SETTID)
-                put_user_u32(ret, parent_tidptr);
         } else {
             ret = -1;
         }
-- 
cgit v1.2.3


From 5ea2fc84da1bffce749c9d0848f5336def2818bb Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 2 Aug 2016 18:41:27 +0100
Subject: linux-user: Sanity check clone flags

We currently make no checks on the flags passed to the clone syscall,
which means we will not fail clone attempts which ask for features
that we can't implement. Add sanity checking of the flags to clone
(which we were already doing in the "this is a fork" path, but not
for the "this is a new thread" path), tidy up the checking in
the fork path to match it, and check that the fork case isn't trying
to specify a custom termination signal.

This is helpful in causing some LTP test cases to fail cleanly
rather than behaving bizarrely when we let the clone succeed
but didn't provide the semantics requested by the flags.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 3 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 3b7b51f6c7..d7e4d9ff2f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -112,8 +112,56 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 
 #include "qemu.h"
 
-#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
-    CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
+#ifndef CLONE_IO
+#define CLONE_IO                0x80000000      /* Clone io context */
+#endif
+
+/* We can't directly call the host clone syscall, because this will
+ * badly confuse libc (breaking mutexes, for example). So we must
+ * divide clone flags into:
+ *  * flag combinations that look like pthread_create()
+ *  * flag combinations that look like fork()
+ *  * flags we can implement within QEMU itself
+ *  * flags we can't support and will return an error for
+ */
+/* For thread creation, all these flags must be present; for
+ * fork, none must be present.
+ */
+#define CLONE_THREAD_FLAGS                              \
+    (CLONE_VM | CLONE_FS | CLONE_FILES |                \
+     CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM)
+
+/* These flags are ignored:
+ * CLONE_DETACHED is now ignored by the kernel;
+ * CLONE_IO is just an optimisation hint to the I/O scheduler
+ */
+#define CLONE_IGNORED_FLAGS                     \
+    (CLONE_DETACHED | CLONE_IO)
+
+/* Flags for fork which we can implement within QEMU itself */
+#define CLONE_OPTIONAL_FORK_FLAGS               \
+    (CLONE_SETTLS | CLONE_PARENT_SETTID |       \
+     CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID)
+
+/* Flags for thread creation which we can implement within QEMU itself */
+#define CLONE_OPTIONAL_THREAD_FLAGS                             \
+    (CLONE_SETTLS | CLONE_PARENT_SETTID |                       \
+     CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID | CLONE_PARENT)
+
+#define CLONE_INVALID_FORK_FLAGS                                        \
+    (~(CSIGNAL | CLONE_OPTIONAL_FORK_FLAGS | CLONE_IGNORED_FLAGS))
+
+#define CLONE_INVALID_THREAD_FLAGS                                      \
+    (~(CSIGNAL | CLONE_THREAD_FLAGS | CLONE_OPTIONAL_THREAD_FLAGS |     \
+       CLONE_IGNORED_FLAGS))
+
+/* CLONE_VFORK is special cased early in do_fork(). The other flag bits
+ * have almost all been allocated. We cannot support any of
+ * CLONE_NEWNS, CLONE_NEWCGROUP, CLONE_NEWUTS, CLONE_NEWIPC,
+ * CLONE_NEWUSER, CLONE_NEWPID, CLONE_NEWNET, CLONE_PTRACE, CLONE_UNTRACED.
+ * The checks against the invalid thread masks above will catch these.
+ * (The one remaining unallocated bit is 0x1000 which used to be CLONE_PID.)
+ */
 
 //#define DEBUG
 /* Define DEBUG_ERESTARTSYS to force every syscall to be restarted
@@ -6013,6 +6061,8 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
     CPUArchState *new_env;
     sigset_t sigmask;
 
+    flags &= ~CLONE_IGNORED_FLAGS;
+
     /* Emulate vfork() with fork() */
     if (flags & CLONE_VFORK)
         flags &= ~(CLONE_VFORK | CLONE_VM);
@@ -6022,6 +6072,11 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         new_thread_info info;
         pthread_attr_t attr;
 
+        if (((flags & CLONE_THREAD_FLAGS) != CLONE_THREAD_FLAGS) ||
+            (flags & CLONE_INVALID_THREAD_FLAGS)) {
+            return -TARGET_EINVAL;
+        }
+
         ts = g_new0(TaskState, 1);
         init_task_state(ts);
         /* we create a new CPU instance. */
@@ -6083,7 +6138,12 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         pthread_mutex_unlock(&clone_lock);
     } else {
         /* if no CLONE_VM, we consider it is a fork */
-        if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) {
+        if (flags & CLONE_INVALID_FORK_FLAGS) {
+            return -TARGET_EINVAL;
+        }
+
+        /* We can't support custom termination signals */
+        if ((flags & CSIGNAL) != TARGET_SIGCHLD) {
             return -TARGET_EINVAL;
         }
 
-- 
cgit v1.2.3


From 5457dc9e37fe0a29989bd64306c63941074864ce Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Fri, 8 Jul 2016 01:17:27 +0200
Subject: linux-user: fix TARGET_NR_select

TARGET_NR_select can have three different implementations:

  1- to always return -ENOSYS

     microblaze, ppc, ppc64

     -> TARGET_WANT_NI_OLD_SELECT

  2- to take parameters from a structure pointed by arg1
    (kernel sys_old_select)

     i386, arm, m68k

     -> TARGET_WANT_OLD_SYS_SELECT

  3- to take parameters from arg[1-5]
     (kernel sys_select)

     x86_64, alpha, s390x,
     cris, sparc, sparc64

Some (new) architectures don't define NR_select,

  4- but only NR__newselect with sys_select:

      mips, mips64, sh

  5- don't define NR__newselect, and use pselect6 syscall:

      aarch64, openrisc, tilegx, unicore32

Reported-by: Timothy Pearson <tpearson@raptorengineering.com>
Reported-by: Allan Wirth <awirth@akamai.com>
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 48 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 17 deletions(-)

(limited to 'linux-user/syscall.c')

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d7e4d9ff2f..7aa2c1d720 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1444,6 +1444,29 @@ static abi_long do_select(int n,
 
     return ret;
 }
+
+#if defined(TARGET_WANT_OLD_SYS_SELECT)
+static abi_long do_old_select(abi_ulong arg1)
+{
+    struct target_sel_arg_struct *sel;
+    abi_ulong inp, outp, exp, tvp;
+    long nsel;
+
+    if (!lock_user_struct(VERIFY_READ, sel, arg1, 1)) {
+        return -TARGET_EFAULT;
+    }
+
+    nsel = tswapal(sel->n);
+    inp = tswapal(sel->inp);
+    outp = tswapal(sel->outp);
+    exp = tswapal(sel->exp);
+    tvp = tswapal(sel->tvp);
+
+    unlock_user_struct(sel, arg1, 0);
+
+    return do_select(nsel, inp, outp, exp, tvp);
+}
+#endif
 #endif
 
 static abi_long do_pipe2(int host_pipe[], int flags)
@@ -8668,24 +8691,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #if defined(TARGET_NR_select)
     case TARGET_NR_select:
-#if defined(TARGET_S390X) || defined(TARGET_ALPHA)
-        ret = do_select(arg1, arg2, arg3, arg4, arg5);
+#if defined(TARGET_WANT_NI_OLD_SELECT)
+        /* some architectures used to have old_select here
+         * but now ENOSYS it.
+         */
+        ret = -TARGET_ENOSYS;
+#elif defined(TARGET_WANT_OLD_SYS_SELECT)
+        ret = do_old_select(arg1);
 #else
-        {
-            struct target_sel_arg_struct *sel;
-            abi_ulong inp, outp, exp, tvp;
-            long nsel;
-
-            if (!lock_user_struct(VERIFY_READ, sel, arg1, 1))
-                goto efault;
-            nsel = tswapal(sel->n);
-            inp = tswapal(sel->inp);
-            outp = tswapal(sel->outp);
-            exp = tswapal(sel->exp);
-            tvp = tswapal(sel->tvp);
-            unlock_user_struct(sel, arg1, 0);
-            ret = do_select(nsel, inp, outp, exp, tvp);
-        }
+        ret = do_select(arg1, arg2, arg3, arg4, arg5);
 #endif
         break;
 #endif
-- 
cgit v1.2.3