Skip to content
16 changes: 14 additions & 2 deletions metric_providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,18 @@ def check_system(self):

# implemented as getter function and not direct access, so it can be overloaded
# some child classes might not actually have _ps attribute set
#
# This function has to go through quite some hoops to read the stderr
# The preferred way to communicate with processes is through communicate()
# However this function ALWAYS waits for the process to terminate and it does not allow reading from processes
# in chunks while they are running. Thus we we cannot set encoding='UTF-8' in Popen and must decode here.
def get_stderr(self):
return self._ps.stderr.read()
stderr_read = ''
if self._ps.stderr is not None:
stderr_read = self._ps.stderr.read()
if isinstance(stderr_read, bytes):
stderr_read = stderr_read.decode('utf-8')
return stderr_read
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we always return the empty string we can remove quite a few checks for stderr being None


def has_started(self):
return self._has_started
Expand Down Expand Up @@ -131,7 +141,9 @@ def start_profiling(self, containers=None):
[call_string],
shell=True,
preexec_fn=os.setsid,
stderr=subprocess.PIPE
stderr=subprocess.PIPE,
#encoding='UTF-8' # we cannot set this option here as reading later will then flake with "can't concat NoneType to bytes"
# see get_stderr() for additional details
# since we are launching the command with shell=True we cannot use ps.terminate() / ps.kill().
# This would just kill the executing shell, but not it's child and make the process an orphan.
# therefore we use os.setsid here and later call os.getpgid(pid) to get process group that the shell
Expand Down
2 changes: 1 addition & 1 deletion metric_providers/powermetrics/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def read_metrics(self, run_id, containers=None):
def get_stderr(self):
stderr = super().get_stderr()

if stderr is not None and str(stderr).find('proc_pid') != -1:
if stderr.find('proc_pid') != -1:
return None

return stderr
8 changes: 4 additions & 4 deletions runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@ def start_metric_providers(self, allow_container=True, allow_other=True):

stderr_read = metric_provider.get_stderr()
print(f"Stderr check on {metric_provider.__class__.__name__}")
if stderr_read is not None and str(stderr_read):
if stderr_read:
raise RuntimeError(f"Stderr on {metric_provider.__class__.__name__} was NOT empty: {stderr_read}")


Expand Down Expand Up @@ -1100,7 +1100,7 @@ def stop_metric_providers(self):
continue

stderr_read = metric_provider.get_stderr()
if stderr_read is not None and str(stderr_read):
if stderr_read:
errors.append(f"Stderr on {metric_provider.__class__.__name__} was NOT empty: {stderr_read}")

# pylint: disable=broad-exception-caught
Expand Down Expand Up @@ -1141,6 +1141,7 @@ def read_and_cleanup_processes(self):

for ps in self.__ps_to_read:
if ps['detach']:
# communicate will only really work to our experience if the process is killed before
stdout, stderr = ps['ps'].communicate(timeout=5)
else:
stdout = ps['ps'].stdout
Expand All @@ -1159,8 +1160,7 @@ def read_and_cleanup_processes(self):
if match := re.findall(r'GMT_SCI_R=(\d+)', line):
self._sci['R'] += int(match[0])
if stderr:
stderr = stderr.splitlines()
for line in stderr:
for line in stderr.splitlines():
print('stderr from process:', ps['cmd'], line)
self.add_to_log(ps['container_name'], f"stderr: {line}", ps['cmd'])

Expand Down