diff --git a/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py b/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py --- a/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py +++ b/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py @@ -284,57 +284,71 @@ class SystemResourceMonitor(object): """ if not self._process: self._stopped = True return assert self._running assert not self._stopped - self._pipe.send(('terminate',)) + try: + self._pipe.send(('terminate',)) + except Exception: + pass self._running = False self._stopped = True self.measurements = [] - done = False - # The child process will send each data sample over the pipe # as a separate data structure. When it has finished sending # samples, it sends a special "done" message to indicate it # is finished. - while self._pipe.poll(1.0): - start_time, end_time, io_diff, cpu_diff, cpu_percent, virt_mem, \ - swap_mem = self._pipe.recv() + + # multiprocessing.Pipe is not actually a pipe on at least Linux. that + # has an effect on the expected outcome of reading from it when the + # other end of the pipe dies, leading to possibly hanging on revc() + # below. So we must poll(). + def poll(): + try: + return self._pipe.poll(0.1) + except Exception: + # Poll might throw an exception even though there's still + # data to read. That happens when the underlying system call + # returns both POLLERR and POLLIN, but python doesn't tell us + # about it. So assume there is something to read, and we'll + # get an exception when trying to read the data. + return True + while poll(): + try: + start_time, end_time, io_diff, cpu_diff, cpu_percent, virt_mem, \ + swap_mem = self._pipe.recv() + except Exception: + # Let's assume we're done here + break # There should be nothing after the "done" message so # terminate. if start_time == 'done': - done = True break io = self._io_type(*io_diff) virt = self._virt_type(*virt_mem) swap = self._swap_type(*swap_mem) cpu_times = [self._cpu_times_type(*v) for v in cpu_diff] self.measurements.append(SystemResourceUsage(start_time, end_time, cpu_times, cpu_percent, io, virt, swap)) # We establish a timeout so we don't hang forever if the child # process has crashed. self._process.join(10) if self._process.is_alive(): self._process.terminate() self._process.join(10) - else: - # We should have received a "done" message from the - # child indicating it shut down properly. This only - # happens if the child shuts down cleanly. - assert done if len(self.measurements): self.start_time = self.measurements[0].start self.end_time = self.measurements[-1].end # Methods to record events alongside the monitored data. def record_event(self, name):