summaryrefslogtreecommitdiff
path: root/bin
diff options
authorPMR <pmr@pmr-lander>2020-07-15 16:16:09 +0000
committerPMR <pmr@pmr-lander>2020-07-15 16:16:09 +0000
commitf210ce4d8305dc63cc9270be536eb90e255638ba (patch)
treee97be1a95045634a9232da9ad2cb72979fbcd92c /bin
parente7a5dfb7d6cab64358ad34f0cb563ffab9639500 (diff)
parent36b00ba2ea1a33dc12fcec2f04badfb8d1513705 (diff)
Merge #386438 from ~rodsmith/plainbox-provider-checkbox:make-stress-ng-use-library
Modify stress_ng_test to use new disk library
Diffstat (limited to 'bin')
-rwxr-xr-xbin/stress_ng_test484
1 files changed, 125 insertions, 359 deletions
diff --git a/bin/stress_ng_test b/bin/stress_ng_test
index 1bc384a..b8c2586 100755
--- a/bin/stress_ng_test
+++ b/bin/stress_ng_test
@@ -1,25 +1,24 @@
#!/usr/bin/env python3
-"""
-Copyright (C) 2020 Canonical Ltd.
-
-Authors
- Rod Smith <rod.smith@canonical.com>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License version 3,
-as published by the Free Software Foundation.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+# Copyright (C) 2020 Canonical Ltd.
+#
+# Authors
+# Rod Smith <rod.smith@canonical.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-The purpose of this script is to run CPU, memory, and disk stress
-tests using the stress-ng binary program. It replaces the older
-cpu_stress, memory_stress_ng, and disk_stress_ng Bash scripts.
+"""
+Run CPU, memory, and disk stress tests using the stress-ng binary program.
"""
@@ -29,43 +28,41 @@ from argparse import (
)
from subprocess import (
CalledProcessError,
+ check_output,
PIPE,
Popen,
+ run,
STDOUT,
TimeoutExpired
)
import os
-import psutil
import shlex
import shutil
-import signal
import stat
import sys
import time
import uuid
+import psutil
+from checkbox_support.disk_support import Disk
-# 10GiB (smallest acceptable size for disk tests):
-min_fs_size = 10 * 1024 * 1024 * 1024
# Swap filename
my_swap = None
-class stress_ng():
- """Interfaces with the external stress-ng binary -- accepts
- test parameters, runs the test, and enables access to test
- results."""
+class StressNg():
+ """Interface with the external stress-ng binary."""
+ # Accepts test parameters, runs the test, and enables access to test
+ # results.
def __init__(self,
- stressors=['str'],
- wrapper_timeout=25,
- sng_timeout=20,
- test_dir="/tmp",
+ stressors,
+ wrapper_timeout,
+ sng_timeout,
extra_options=""):
self.stressors = stressors
self.wrapper_timeout = wrapper_timeout
self.sng_timeout = sng_timeout
- self.test_dir = test_dir
self.extra_options = extra_options
self.results = ""
self.returncode = 0
@@ -75,7 +72,7 @@ class stress_ng():
stressor_list = "--" + " 0 --".join(self.stressors)
command = "stress-ng --aggressive --verify --timeout {} {} {} 0". \
- format(int(self.sng_timeout),
+ format(self.sng_timeout,
self.extra_options,
stressor_list)
time_str = time.strftime("%d %b %H:%M", time.gmtime())
@@ -83,41 +80,33 @@ class stress_ng():
print("{}: Running stress-ng {} stressor for {:.0f} seconds...".
format(time_str, self.stressors[0], self.sng_timeout))
else:
- print("{}: Running multiple stress-ng ".format(time_str) +
- "stressors in parallel for {:.0f}".format(self.sng_timeout))
+ print("{}: Running multiple stress-ng stressors in "
+ "parallel for {:.0f}".format(time_str, self.sng_timeout))
print("seconds...")
try:
- run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
- local_results = run.communicate(timeout=self.wrapper_timeout)[0]
- self.results = (local_results.decode(encoding="utf-8",
- errors="ignore"))
- self.returncode = run.returncode
- if self.returncode != 0:
- print("stress_ng exited with code {}".format(self.returncode))
+ self.results = check_output(
+ shlex.split(command), timeout=self.wrapper_timeout).decode(
+ encoding=sys.stdout.encoding)
except CalledProcessError as err:
- print("stress_ng exited with code {}".format(err.returncode))
- self.results = err.stdout
- self.returncode = run.returncode
+ print("** stress-ng exited with code {}".format(err.returncode))
+ self.results = err.stdout.decode(encoding="utf-8")
+ self.returncode = err.returncode
except TimeoutExpired:
- print("stress_ng timed out!")
- os.kill(run.pid, signal.SIGINT)
+ print("** stress-ng timed out and was forcefully terminated")
self.results = ""
- # For consistency with old bash script & "timeout" wrapper...
- self.returncode = 124
+ self.returncode = 1
except KeyboardInterrupt:
self.results = ""
- self.returncode = 125
- return self.returncode
-
- def get_results(self):
- return self.results
-
- def get_returncode(self):
+ print("** stress-ng test was terminated by SIGINT (Ctrl+C)!")
+ self.returncode = 1
+ except FileNotFoundError:
+ print("** stress-ng binary not found!")
+ self.results = ""
+ self.returncode = 1
return self.returncode
-"""Define CPU-related functions..."""
-
+# Define CPU-related functions...
def stress_cpu(args):
"""Run stress-ng tests on CPUs."""
@@ -128,64 +117,51 @@ def stress_cpu(args):
'tsearch', 'vecmath', 'wcs']
# Add 10% to runtime; will forcefully terminate if stress-ng
# fails to return in that time.
- end_time = args.base_time * 11 / 10
+ end_time = 1.1 * args.base_time
print("Estimated total run time is {:.0f} minutes\n".
- format(args.base_time/60))
+ format(args.base_time / 60))
- test_object = stress_ng(stressors=stressors,
- sng_timeout=args.base_time,
- wrapper_timeout=end_time,
- extra_options="--metrics-brief --tz --times")
+ test_object = StressNg(stressors=stressors,
+ sng_timeout=args.base_time,
+ wrapper_timeout=end_time,
+ extra_options="--metrics-brief --tz --times")
retval = test_object.run()
- print(test_object.get_results())
+ print(test_object.results)
return retval
-"""Define memory-related functions..."""
-
+# Define memory-related functions...
def num_numa_nodes():
"""Return the number of NUMA nodes supported by the CPU."""
- if shutil.which("numactl") is None:
+ try:
+ return int(run(['numactl', '--hardware'],
+ stdout=PIPE).stdout.split()[1])
+ except:
return 1
- else:
- command = "numactl --hardware"
- numactl = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
- local_results = numactl.communicate()[0].split()
- # local_results[1] will sometimes hold the number of NUMA nodes;
- # but "numactl --hardware" sometimes returns the error message
- # "No NUMA available on this system", so if this (or some other)
- # error message appears, assume one NUMA node....
- try:
- num_nodes = int(local_results[1])
- except ValueError:
- num_nodes = 1
- return num_nodes
def swap_space_ok(args):
- """Check available swap space. If too small, add more. The minimum
- acceptable mount is defined as the GREATER of the amount specified
- by the command-line -s/--swap-space option OR the amount specified
- by the STRESS_NG_MIN_SWAP_SIZE environment variable. Both values are
- specified in gibibytes (GiB). If neither is specified, a value of 0
- (no swap required) is assumed.
- Returns:
- - True if OK (already or after adding more)
- - False if insufficient swap space"""
+ """Check available swap space."""
+ # If swap space is too small, add more. The minimum
+ # acceptable amount is defined as the GREATER of the amount specified
+ # by the command-line -s/--swap-space option OR the amount specified
+ # by the STRESS_NG_MIN_SWAP_SIZE environment variable. Both values are
+ # specified in gibibytes (GiB). If neither is specified, a value of 0
+ # (no swap required) is assumed.
+ # Returns:
+ # - True if OK (already or after adding more)
+ # - False if insufficient swap space
- retval = 0
all_ok = True
global my_swap
min_swap_space = 0
- if "STRESS_NG_MIN_SWAP_SIZE" in os.environ:
- min_swap_space = int(os.environ['STRESS_NG_MIN_SWAP_SIZE']) \
- * 1024 * 1024 * 1024
- if args.swap_size > min_swap_space:
- min_swap_space = args.swap_size * 1024 * 1024 * 1024
- print("Minimum swap space is set to {:.0f} GiB".
- format(min_swap_space / 1024 / 1024 / 1024))
+
+ swap_size = max(os.environ.get('STRESS_NG_MIN_SWAP_SPACE', 0),
+ args.swap_size)
+ print("Minimum swap space is set to {} GiB".format(swap_size))
+ min_swap_space = swap_size * 1024 ** 3
swap = psutil.swap_memory()
if swap.total < min_swap_space:
print("Swap space too small! Attempting to add more (this may take " +
@@ -201,26 +177,23 @@ def swap_space_ok(args):
# use that potentially exceeds available RAM....
for i in range(int((min_swap_space + 10240) / 1024)):
f.write(b"\x00" * 1024)
- f.close()
+ f.flush()
except OSError:
print("Unable to create temporary swap file! Aborting test!")
- f.close()
- os.remove(my_swap)
+ try:
+ # In case the file was partially written but errored out
+ # (say, because of a lack of disk space)
+ os.remove(my_swap)
+ except FileNotFoundError:
+ # This exception will happen if the file doesn't exist at all
+ pass
all_ok = False
if all_ok:
os.chmod(my_swap, stat.S_IRUSR | stat.S_IWUSR)
- cmd = "mkswap {}".format(my_swap)
- Popen(shlex.split(cmd), stderr=STDOUT, stdout=PIPE).communicate()[0]
- cmd = "swapon {}".format(my_swap)
- Popen(shlex.split(cmd), stderr=STDOUT, stdout=PIPE).communicate()[0]
- else:
- retval = False
+ run(['mkswap', my_swap])
+ run(['swapon', my_swap])
swap = psutil.swap_memory()
- if swap.total < min_swap_space:
- retval = False
- else:
- retval = True
- return retval
+ return swap.total >= min_swap_space
def stress_memory(args):
@@ -228,14 +201,16 @@ def stress_memory(args):
retval = 0
if not swap_space_ok(args):
- return 130
+ print("** Swap space unavailable! Please activate swap space " +
+ "and re-run this test!")
+ return 1
ram = psutil.virtual_memory()
- total_mem_in_gb = ram.total / 1073741824
+ total_mem_in_gb = ram.total / (1024 ** 3)
vrt = args.base_time + total_mem_in_gb * args.time_per_gig
print("Total memory is {:.1f} GiB".format(total_mem_in_gb))
- print("Constant run time is {} seconds per stressor".
- format(args.base_time))
+ print("Constant run time is {} seconds per stressor".format(
+ args.base_time))
print("Variable run time is {:.0f} seconds per stressor".format(vrt))
print("Number of NUMA nodes is {}".format(num_numa_nodes()))
@@ -255,18 +230,18 @@ def stress_memory(args):
est_runtime = len(crt_stressors) * args.base_time + \
len(vrt_stressors) * vrt
print("Estimated total run time is {:.0f} minutes\n".
- format(est_runtime/60))
+ format(est_runtime / 60))
for stressor in crt_stressors:
- test_object = stress_ng(stressors=stressor.split(),
- sng_timeout=args.base_time,
- wrapper_timeout=args.base_time*2)
+ test_object = StressNg(stressors=stressor.split(),
+ sng_timeout=args.base_time,
+ wrapper_timeout=args.base_time*2)
retval = retval | test_object.run()
- print(test_object.get_results())
+ print(test_object.results)
for stressor in vrt_stressors:
- test_object = stress_ng(stressors=stressor.split(), sng_timeout=vrt,
- wrapper_timeout=vrt*2)
+ test_object = StressNg(stressors=stressor.split(), sng_timeout=vrt,
+ wrapper_timeout=vrt*2)
retval = retval | test_object.run()
- print(test_object.get_results())
+ print(test_object.results)
if my_swap is not None and args.keep_swap is False:
print("Deleting temporary swap file....")
cmd = "swapoff {}".format(my_swap)
@@ -275,187 +250,6 @@ def stress_memory(args):
return retval
-"""Define disk-related functions..."""
-
-
-def get_partition_data(file):
- """Get partition details (size & type) on /dev/{file} & return in
- dictionary."""
-
- part_data = {}
- part_data['name'] = file
-
- # Get size of device, in bytes....
- command = "blockdev --getsize64 /dev/{}".format(file)
- run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
- part_data['size'] = int(run.communicate()[0])
-
- # Get filesystem type....
- part_data['fs_type'] = ""
- command = "blkid /dev/{} -o export".format(file)
- run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
- local_results = run.communicate()[0].split()
- for result in local_results:
- result_str = result.decode(encoding="utf-8", errors="ignore")
- if "TYPE" in result_str:
- part_data['fs_type'] = result_str.split("=")[1]
- return part_data
-
-
-def find_mount_point(file):
- """Find the mount point of /dev/{file}.
- Returns:
- * None if unmounted
- * The mount point (as a string) if it's mounted."""
-
- mount_point = None
- command = "df /dev/{} --output=target".format(file)
- run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
- output = run.communicate()[0].decode(encoding="utf-8", errors="ignore"). \
- split()
- potential_mount_point = str(output[-1])
- # If df is fed a non-mounted-partition, it returns "/dev" as the
- # mount point, so ignore that....
- if potential_mount_point != "/dev":
- mount_point = potential_mount_point
- return mount_point
-
-
-class disk():
- """Interfaces to disk device, to check device status, find largest
- partition, mount it, etc."""
-
- def __init__(self, device=""):
- self.device = device
- self.all_parts = []
- self.unsupported_fs = None
- self.test_dir = "/tmp"
- lvm_detected = False
- # Find final element of device name; for instance "sda" for "/dev/sda"
- stripped_devname = self.device.split("/")[-1]
-
- # Do first pass to collect data on partitions & software RAID
- # devices (which we treat like partitions)....
- for file in os.listdir("/sys/class/block"):
- if stripped_devname in file:
- part_data = get_partition_data(file)
- part_data['part_type'] = "partition"
- if part_data['fs_type'] == "LVM2_member":
- lvm_detected = True
- self.all_parts.append(part_data)
-
- # Do another pass to collect data on logical volumes, if any exist
- # on the target device....
- # NOTE: This code ignores where an LVM exists; it could span multiple
- # disks, or be on one other than the one being tested. Canonical
- # certification specifies use of partitions, not LVMs, so this code
- # exists mainly for software development using development systems,
- # not on servers actually being tested.
- if lvm_detected:
- for file in os.listdir("/sys/class/block/"):
- if "dm-" in file:
- part_data = get_partition_data(file)
- part_data['part_type'] = "lv"
- self.all_parts.append(part_data)
-
- def is_block_device(self):
- try:
- mode = os.stat(self.device).st_mode
- if not stat.S_ISBLK(mode):
- print("{} is NOT a block device! Aborting!".
- format(self.device))
- return False
- except FileNotFoundError:
- print("{} does not exist! Aborting!".format(self.device))
- return False
- return True
-
- def find_largest_partition(self):
- """Find the largest partition that holds a supported filesystem on
- self.device. Sets:
- self.largest_part -- Dictionary containing information on largest
- partition
- self.unsupported_fs -- Empty or contains information about largest
- unsupported filesystem (of certain known types)
- found on disk"""
-
- self.largest_part = {'name': "",
- 'size': 0,
- 'part_type': "lv",
- 'fs_type': ""}
- self.unsupported_fs = None
-
- # A filesystem can be supported for the test; unsupported but worth
- # noting in an error message; or unsupported and not worth noting.
- # The first two categories are enumerated in lists....
- supported_filesystems = ['ext2', 'ext3', 'ext4', 'xfs', 'jfs', 'btrfs']
- unsupported_filesystems = ['ntfs', 'vfat', 'hfs', 'LVM2_member']
-
- for part in self.all_parts:
- new_sz = int(part['size'])
- old_sz = int(self.largest_part['size'])
- new_lv = part['part_type'] == "lv"
- old_lv = self.largest_part['part_type'] == "lv"
- if (new_sz > 0 and old_sz == 0) or \
- (new_sz > min_fs_size and old_sz < min_fs_size) or \
- (new_sz > min_fs_size and new_sz > old_sz and old_lv) or \
- (new_sz > old_sz and not new_lv):
- if part['fs_type'] in supported_filesystems:
- self.largest_part = part
- elif part['fs_type'] in unsupported_filesystems:
- # Make note of it if it might be an old filesystem
- # that was not properly re-allocated....
- self.unsupported_fs = part
- return self.largest_part
-
- def mount_filesystem(self, simulate):
- print("Disk device is {}".format(self.device))
- target_part = self.find_largest_partition()
- if target_part['name'] == "":
- if self.unsupported_fs is not None:
- print("A filesystem of type {} was found, but is not "
- "supported by this test.".
- format(self.unsupported_fs['fs_type']))
- print("A Linux-native filesystem (ext2/3/4fs, XFS, JFS, or "
- "Btrfs) is required.")
- else:
- print("No suitable partition found!")
- return False
-
- if target_part['size'] < min_fs_size:
- print("Warning: {} is less than {:.0f} GiB in size!".
- format(target_part['name'], min_fs_size/1024/1024/1024))
- print("Disk is too small to test. Aborting test!")
- return False
-
- full_device = "/dev/{}".format(target_part['name'])
- print("Testing partition {}".format(full_device))
- mount_point = find_mount_point(target_part['name'])
- if simulate:
- print("Run with --simulate, so not mounting filesystems.")
- print("If run without --simulate, would mount {} to {}".
- format(full_device, mount_point))
- print("(if not already mounted).")
- else:
- if not mount_point:
- mount_point = "/mnt/{}".format(target_part['name'])
- print("Trying to mount {} to {}...".
- format(full_device, mount_point))
- os.makedirs(mount_point, exist_ok=True)
- command = "mount {} {}".format(full_device, mount_point)
- run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
- output = run.communicate()[0].decode(encoding="utf-8",
- errors="ignore")
- print(output)
- else:
- print("{} is already mounted at {}".
- format(full_device, mount_point))
- self.test_dir = "{}/tmp/stress-ng-{}".format(mount_point,
- uuid.uuid1())
- os.makedirs(self.test_dir, exist_ok=True)
- return True
-
-
def stress_disk(args):
"""Run stress-ng tests on disk."""
@@ -469,9 +263,10 @@ def stress_disk(args):
if "/dev" not in args.device and args.device != "":
args.device = "/dev/" + args.device
- test_disk = disk(args.device)
+ test_disk = Disk(args.device)
if not test_disk.is_block_device():
- return 131
+ print("** {} is not a block device! Aborting!".format(args.device))
+ return 1
if test_disk.mount_filesystem(args.simulate):
est_runtime = len(disk_stressors) * args.base_time
print("Using test directory: '{}'".format(test_disk.test_dir))
@@ -482,22 +277,22 @@ def stress_disk(args):
for stressor in disk_stressors:
disk_options = "--temp-path {} ".format(test_disk.test_dir) + \
"--hdd-opts dsync --readahead-bytes 16M -k"
- test_object = stress_ng(stressors=stressor.split(),
- sng_timeout=args.base_time,
- wrapper_timeout=args.base_time*5,
- extra_options=disk_options)
+ test_object = StressNg(stressors=stressor.split(),
+ sng_timeout=args.base_time,
+ wrapper_timeout=args.base_time*5,
+ extra_options=disk_options)
retval = retval | test_object.run()
- print(test_object.get_results())
+ print(test_object.results)
if test_disk.test_dir != "/tmp" and not args.simulate:
shutil.rmtree(test_disk.test_dir, ignore_errors=True)
else:
- retval = 132
+ print("** Unable to find a suitable partition! Aborting!")
+ retval = 1
return retval
-"""Main program body..."""
-
+# Main program body...
def main():
"""Run a stress_ng-based stress run."""
@@ -512,9 +307,6 @@ def main():
memory_parser = subparsers.add_parser('memory', help=("Run memory tests"))
disk_parser = subparsers.add_parser('disk', help=("Run disk tests"))
- # Sub test options
- # action = test_parser.add_mutually_exclusive_group()
-
# CPU parameters
cpu_parser.add_argument("-b", "--base-time", type=int, default=7200,
help="Run time, in seconds (default=7200)")
@@ -524,8 +316,8 @@ def main():
help="Base time for each test, in seconds " +
"(default=300)", default=300)
memory_parser.add_argument("-t", "--time-per-gig", type=int,
- help="Extra time per GiB for some stressors " +
- "(default=10)", default=10)
+ help="Extra time per GiB for some stressors," +
+ " in seconds (default=10)", default=10)
memory_parser.add_argument("-s", "--swap-size", type=int,
help="swap size in GiB", default=0)
memory_parser.add_argument("-k", "--keep-swap", action="store_true",
@@ -546,49 +338,23 @@ def main():
args = parser.parse_args()
- # logging.basicConfig(level=logging.INFO)
-
if shutil.which("stress-ng") is None:
- print("The stress-ng utility is not installed; exiting!")
- return(128)
+ print("** The stress-ng utility is not installed; exiting!")
+ return 1
if not os.geteuid() == 0:
- print("This program must be run as root (or via sudo); exiting!")
- return(129)
+ print("** This program must be run as root (or via sudo); exiting!")
+ return 1
- retval = 1
- if 'func' not in args:
- parser.print_help()
+ retval = args.func(args)
+ print("retval is {}".format(retval))
+ print("*" * 62)
+ if retval == 0:
+ print("* stress-ng test passed!")
else:
- retval = args.func(args)
- print("**************************************************************")
- if retval == 0:
- print("* stress-ng test passed!")
- elif retval == 124: # Terminated by Python timeout
- print("** stress-ng test timed out and was forcefully ")
- print(" terminated (Error {})".format(retval))
- elif retval == 125: # Terminated by SIGINT
- print("** stress-ng test timed out and SIGINT (Ctrl+C) " +
- "was used to terminate")
- print(" the test (Error {})!".format(retval))
- elif retval == 130: # Insufficient swap space for memory test
- print("** Swap space unavailable! Please activate swap space " +
- "and re-run this test!")
- print(" (Error {})".format(retval))
- elif retval == 131: # Alleged disk device is not a device file
- print("** {} is not a block device! Aborting!".format(args.device))
- print(" (Error {})".format(retval))
- elif retval == 132: # Unable to find a partition for disk test
- print("** Unable to find a suitable partition! Aborting!")
- print(" (Error {})".format(retval))
- elif retval == 137: # Terminated by SIGKILL
- print("** stress-ng test timed out and SIGKILL was used to ")
- print(" terminate the test (Error {})!".format(retval))
- else:
- print("stress-ng test failed with return code: {}".format(retval))
- print("**************************************************************")
+ print("** stress-ng test failed!")
+ print("*" * 62)
- return(retval)
+ return retval
-if __name__ == '__main__':
- sys.exit(main())
+sys.exit(main())