Merge #379215 from ~rodsmith/plainbox-provider-checkbox:unify-stress-ng-wrapper-scripts-2

Replace three stress-ng wrapper scripts with one unified script.
author: PMR <pmr@pmr-lander> 2020-02-25 16:35:50 +0000
committer: PMR <pmr@pmr-lander> 2020-02-25 16:35:50 +0000
commit: 0c06eadd115b2cf015b8edcb756fe9236c151753 (patch)
tree: 991c7b64a43a8803434358237fa40ca4f09d80c8
parent: b5986748a745f0c41231739d82e31b5fea03b885 (diff)
parent: 7f54a248b998a84b58cf26f4e53652766ff10f71 (diff)
7 files changed, 600 insertions, 600 deletions
diff --git a/bin/cpu_stress b/bin/cpu_stress
deleted file mode 100755
index 56fccee..0000000
--- a/bin/cpu_stress
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/sh
-
-# Script to perform CPU stress tests
-#
-# Copyright (c) 2016 Canonical Ltd.
-#
-# Authors
-# Rod Smith <rod.smith@canonical.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 3,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# The purpose of this script is to run CPU stress tests using the
-# stress-ng program.
-#
-# Usage:
-# cpu_stress [ --runtime <time-in-seconds> ]
-#
-# If --runtime is not specified, it defaults to 7200 (2 hours).
-
-runtime=7200
-if [ "$#" = "2" ] && [ "$1" = "--runtime" ] && [ "$2" -eq "$2" ] ; then
- runtime=$2
-elif [ "$#" != "0" ] ; then
- echo "Usage:"
- echo " $0 [ --runtime <time-in-seconds> ]"
- exit 1
-fi
-echo "Setting run time to $runtime seconds"
-# Add 10% to runtime; will forcefully terminate if stress-ng
-# fails to return in that time.
-end_time=$((runtime*11/10))
-
-# NOTE:
-# Options --af-alg 0 through --wcs 0 specify CPU stressors. As of stress-ng
-# version 0.05.12, this is equivalent to --class cpu --all 0 --exclude numa,cpu_online.
-# This script specifies stressors individually because the list of stressors keeps
-# increasing, and we want consistency -- if the stress-ng version bumps up, we
-# don't want new stressors being run. We're omitting numa because it's most
-# useful on systems with massive numbers of CPUs, and cpu_online because it's
-# failed on 4 of 8 test systems, so it seems too strict.
-# Use "timeout" command to launch stress-ng, to catch it should it go into la-la land
-timeout -s 9 $end_time stress-ng --aggressive --verify --timeout $runtime \
- --metrics-brief --tz --times \
- --af-alg 0 --bsearch 0 --context 0 --cpu 0 \
- --crypt 0 --hsearch 0 --longjmp 0 --lsearch 0 \
- --matrix 0 --qsort 0 --str 0 --stream 0 \
- --tsearch 0 --vecmath 0 --wcs 0
-result="$?"
-
-echo "**********************************************************"
-if [ $result = "0" ] ; then
- echo "* stress-ng CPU test passed!"
-else
- if [ $result = "137" ] ; then
- echo "** stress-ng CPU test timed out and SIGKILL was used to " \
- "terminate the test (Error $result)!"
- elif [ $return_code = "124" ] ; then
- echo "* stress-ng CPU test timed out and was forcefully terminated " \
- "(Error $result)!"
- else
- echo "* stress-ng CPU test failed with result $result"
- fi
-fi
-echo "**********************************************************"
-exit $result
diff --git a/bin/disk_stress_ng b/bin/disk_stress_ng
deleted file mode 100755
index 9d8668e..0000000
--- a/bin/disk_stress_ng
+++ /dev/null
@@ -1,330 +0,0 @@
-#!/bin/bash
-
-# Script to disk stress tests using stress-ng
-#
-# Copyright (c) 2016 Canonical Ltd.
-#
-# Authors
-# Rod Smith <rod.smith@canonical.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 3,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# The purpose of this script is to run disk stress tests using the
-# stress-ng program.
-#
-# Usage:
-# disk_stress_ng [ <device-filename> ]
-# [ --base-time <time-in-seconds> ]
-# [ --really-run ]
-#
-# Parameters:
-# --disk-device -- This is the WHOLE-DISK device filename WITHOUT "/dev/"
-# (e.g., sda). The script finds a filesystem on that
-# device, mounts it if necessary, and runs the tests on
-# that mounted filesystem.
-# Test with iostat
-
-get_params() {
- disk_device="/dev/sda"
- short_device="sda"
- base_time="240"
- really_run="N"
- while [ $# -gt 0 ] ; do
- case $1 in
- --base-time) base_time="$2"
- shift
- ;;
- --really-run) really_run="Y"
- ;;
- *) disk_device="/dev/$1"
- disk_device=`echo $disk_device | sed "s/\/dev\/\/dev/\/dev/g"`
- short_device=$(echo $disk_device | sed "s/\/dev//g")
- if [ ! -b $disk_device ] ; then
- echo "Unknown block device \"$disk_device\""
- echo "Usage: $0 [ --base-time <time-in-seconds> ] [ --really-run ]"
- echo " [ device-file ]"
- exit 1
- fi
- ;;
- esac
- shift
- done
- mounted_part="N"
-} # get_params()
-
-
-# Find the largest logical volume in an LVM partition.
-# Output:
-# $largest_part -- Device filename of largest qualifying partition
-# $largest_size -- Size of largest qualifying partition
-# $largest_fs -- Filesystem (ext4, etc.) used on largest qualifying partition
-# Note: Above variables are initialized in find_largest_partition(), which
-# calls this function.
-# Caveat: If LVM is used, there can be no guarantee that a specific disk
-# device is actually being tested. Thus, an LVM configuration should span
-# just one disk device. LVM may be used on one disk, but subsequent disks
-# should use "raw" partitions.
-find_largest_lv() {
- local partonly=$(echo $partition | cut -f 3 -d "/")
- for syslv in $(ls -d /sys/block/dm-*/slaves/$partonly) ; do
- lv=$(echo "$syslv" | cut -f 4 -d "/")
- size=$(cat /sys/block/$lv/size)
- sector_size=$(cat /sys/block/$lv/queue/hw_sector_size)
- let size=$size*$sector_size
- local blkid_info=$(blkid -s TYPE /dev/$lv | grep -E ext2\|ext3\|ext4\|xfs\|jfs\|btrfs)
- if [ "$size" -gt "$largest_size" ] && [ -n "$blkid_info" ] ; then
- local blkid_info=$(blkid -s TYPE /dev/$lv)
- largest_size=$size
- largest_part="/dev/$lv"
- largest_fs=$(blkid -s TYPE "/dev/$lv" | cut -d "=" -f 2)
- fi
- done
-} # find_largest_lv()
-
-
-# Find the largest partition that holds a supported filesystem on $disk_device.
-# Output:
-# $largest_part -- Device filename of largest qualifying partition or logical volume
-# $largest_size -- Size of largest qualifying partition or logical volume
-# $largest_fs -- Filesystem (ext4, etc.) used on largest qualifying partition or logicl volume
-# $unsupported_fs -- Empty or contains name of unsupported filesystem found on disk
-find_largest_partition() {
- largest_part=""
- largest_size=0
- mapper_string="dm-"
- if [ "${disk_device#*$mapper_string}" = "$disk_device" ]; then
- partitions=$(lsblk -b -l -n -o NAME,SIZE,TYPE,MOUNTPOINT $disk_device | grep -E 'part|lvm|raid' | tr -s " ")
- else
- partitions=$(lsblk -b -l -n -o NAME,SIZE,TYPE,MOUNTPOINT $disk_device)
- fi
- unsupported_fs=""
- for partition in $(echo "$partitions" | cut -d " " -f 1) ; do
- if [ -b "/dev/$partition" ]; then
- part_size=$(echo "$partitions" | grep "$partition " | cut -d " " -f 2)
- part_location="/dev/$partition"
- elif [ -b "/dev/mapper/$partition" ]; then
- part_size=$(echo "$partitions" | grep "$partition " | cut -d " " -f 2)
- part_location="/dev/mapper/$partition"
- else
- echo "$partition not found!"
- echo "Aborting test"
- exit 1
- fi
- local blkid_info=$(blkid -s TYPE $part_location | grep -E ext2\|ext3\|ext4\|xfs\|jfs\|btrfs\|LVM2_member)
- if [ "$part_size" -gt "$largest_size" ] && [ -n "$blkid_info" ] ; then
- if [[ "$blkid_info" =~ .*LVM2_member.* ]] ; then
- find_largest_lv
- else
- largest_size=$part_size
- largest_part="$part_location"
- largest_fs=$(blkid -s TYPE "$part_location" | cut -d "=" -f 2)
- fi
- fi
- local blkid_info=$(blkid -s TYPE $part_location | grep -E ntfs\|vfat\|hfs)
- if [ -n "$blkid_info" ] ; then
- # If there's an NTFS, HFS+, or FAT filesystem on the disk make note of it....
- unsupported_fs=$(blkid -s TYPE "/dev/$partition" | cut -d "=" -f 2)
- fi
- done
-} # find_largest_partition()
-
-# Find the largest filesystem on $disk_device. If that partition is not
-# already mounted, try to mount it.
-# Output:
-# $test_dir -- Directory in which tests will occur
-# $mount_point -- Location where filesystem is mounted
-# $mounted_part -- Sets to "Y" if script mounted partition
-# $made_mountpoint -- Sets to "Y" if script created the mount point
-mount_filesystem() {
- test_dir="/tmp/disk_stress_ng_$(uuidgen)"
- if [ -b $disk_device ]
- then
- echo "$disk_device is a block device"
-
- #Add a check for warnings
- WARN=$(parted -s ${disk_device} print | grep "^Warning.*${disk}.*[Rr]ead-only" 2>&1)
- if [[ $? == 0 ]]
- then
- echo "Warning found in parted output:"
- echo $WARN
- echo "Aborting Test"
- exit 1
- fi
- else
- echo "$disk_device is not a block device! Aborting!"
- exit 1
- fi
-
- find_largest_partition
- 
- if [ -n "$largest_part" ] ; then
- echo "Found largest partition: \"$largest_part\""
- # If largest partition is too small, just abort with a message
- if [ $largest_size -lt 10000000000 ] ; then
- echo "Warning: $largest_part is less than 10GiB in size"
- echo "Disk is too small to test. Aborting test!"
- exit 1
- fi
- mount_point=$(df | grep "$largest_part " | tr -s " " | cut -d " " -f 6)
- if [ "$mount_point" == "" ] && [ "$really_run" == "Y" ] ; then
- disk_device=$(echo $disk_device | sed "s/\/dev\/\/dev/\/dev/g")
- mount_point="/mnt$short_device"
- echo "No partition is mounted from $disk_device; attempting to mount one...."
- if [ ! -d $mount_point ] ; then
- mkdir -p "$mount_point"
- made_mountpoint="Y"
- fi
- mount "$largest_part" "$mount_point"
- mounted_part="Y"
- fi
- if [ "$mount_point" == "/" ] ; then
- test_dir="/tmp/disk_stress_ng_$(uuidgen)"
- else
- test_dir="$mount_point/tmp/disk_stress_ng_$(uuidgen)"
- fi
- echo "Test will use $largest_part, mounted at \"$mount_point\", using $largest_fs"
- else
- echo "There appears to be no partition with a suitable filesystem"
- echo "on $disk_device; please create a suitable partition and re-run"
- echo "this test."
- if [ -n "unsupported_fs" ] ; then
- echo "NOTE: A filesystem of type $unsupported_fs was found, but is not supported"
- echo "by this test. A Linux-native filesystem (ext2/3/4fs, XFS, JFS, or Btrfs)"
- echo "is required."
- fi
- exit 1
- fi
-} # mount_filesystem()
-
-
-# Run an individual stressor
-# Input:
-# $1 = stressor name (e.g., copyfile, dentry)
-# $2 = run time
-# Output:
-# had_error -- sets to "1" if an error occurred
-run_stressor() {
- local runtime="$2"
- # Multiply runtime by 5; will forcefully terminate if stress-ng
- # fails to return in that time.
- end_time=$((runtime*5))
- echo "Running stress-ng $1 stressor for $2 seconds...."
- # Use "timeout" command to launch stress-ng, to catch it should it go into
- # la-la land
- timeout -s 14 $end_time stress-ng --aggressive --verify --timeout $runtime \
- --temp-path $test_dir --$1 0 --hdd-opts dsync --readahead-bytes 16M -k
- return_code="$?"
- echo "return_code is $return_code"
- if [ "$return_code" != "0" ] ; then
-	#
-	# a small grace period to allow stressors to terminate
-	#
-	sleep 10
-	#
-	# still running? aggressively kill all stressors
-	#
-	pids=$(pidof stress-ng)
-	if [ -n "$pids" ]; then
-	kill -9 $pids
-	sleep 1
-	kill -9 $pids
-	pids=$(pidof stress-ng)
-	if [ -n "$pids" ]; then
-	echo "Note: stress-ng (PIDS $pids) could not be killed"
-	fi
-	fi
- had_error=1
- echo "*****************************************************************"
- if [ $return_code = "124" ] ; then
- echo "** stress-ng $stressor test timed out and was forcefully " \
- "terminated! (Error $return_code)"
- elif [ $return_code = "137" ] ; then
- echo "** stress-ng $stressor test timed out and SIGKILL was used to " \
- "terminate the test case! (Error $return_code)"
- else
- echo "** Error $return_code reported on stressor $stressor!)"
- fi
- echo "*****************************************************************"
- had_error=1
- result=$return_code
- fi
-} # run_stressor()
-
-
-#
-# Main program body....
-#
-
-
-get_params "$@"
-mount_filesystem
-echo "test_dir is $test_dir"
-
-had_error=0
-
-# Tests Colin said to try but that aren't present as of standard stress-ng
-# in Ubuntu 16.04:
-#
-# "chown" "copyfile" "ioprio" "locka" "lockofd" "madvise" "msync" "seal"
-#
-# TODO: Consider adding these tests for Ubuntu 18.04, or ealier with an
-# updated stress-ng in the certification PPA....
-
-disk_stressors=("aio" "aiol" "chdir" "chmod" "dentry" "dir" "fallocate" \
- "fiemap" "filename" "flock" "fstat" "hdd" "lease" "lockf" \
- "mknod" "readahead" "seek" "sync-file" "xattr")
-
-total_runtime=$((${#disk_stressors[@]}*$base_time))
-
-#
-# Ensure we have emnough async I/O events available, scale it
-# based on number of CPUs on the machine
-#
-if [ -e /proc/sys/fs/aio-max-nr ] ; then
-	aiomax=$((8192 * $(nproc)))
-	aionow=$(cat /proc/sys/fs/aio-max-nr)
-	if [ $aiomax -gt $aionow ] ; then
-	echo $aiomax > /proc/sys/fs/aio-max-nr
-	echo "Setting aio-max-nr to $aiomax"
-	fi
-fi
-
-echo "Estimated total run time is $total_runtime seconds"
-echo ""
-
-if [ "$really_run" == "Y" ] ; then
- mkdir -p "$test_dir"
- for stressor in ${disk_stressors[@]}; do
- run_stressor $stressor $base_time
- done
- rm -rf "$test_dir"
- if [ "$mounted_part" == "Y" ] ; then
- umount "$mount_point"
- if [ "$made_mountpoint" == "Y" ] ; then
- rmdir "$mount_point"
- fi
- fi
-else
- echo "To actually run tests, pass the --really-run option."
- echo "Script is now terminating...."
- exit 1
-fi
-
-echo "*******************************************************************"
-if [ $had_error = "0" ] ; then
- echo "** stress-ng disk test passed!"
-else
- echo "** stress-ng disk test failed; most recent error was $result"
-fi
-echo "*******************************************************************"
-exit $result
diff --git a/bin/memory_stress_ng b/bin/memory_stress_ng
deleted file mode 100755
index e717f12..0000000
--- a/bin/memory_stress_ng
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/bin/bash
-
-# Script to perform memory stress tests
-#
-# Copyright (c) 2016 Canonical Ltd.
-#
-# Authors
-# Rod Smith <rod.smith@canonical.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 3,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# The purpose of this script is to run memory stress tests using the
-# stress-ng program. It also happens to impose a heavy CPU load, but
-# that's a side effect of the memory stressors, not their purpose.
-#
-# Usage:
-# memory_stress_ng [ --base-time <time> ] [ --time-per-gig <time> ]
-#
-# Parameters:
-# --base-time is the time in seconds to run each stressor. (The default
-# is 300 seconds, or five minutes.)
-# --time-per-gig is extra time given to SOME stressors, measured in a
-# seconds per GiB way. (The default is 10 seconds per GiB.)
-#
-# There are a total of 22 constant-run-time stressors and 6 variable-
-# run-time stressors. Given the defaults, this works out to a total
-# expected default run time of 8400 seconds (145 minutes) plus 60 seconds
-# per GiB of RAM -- so a system with 16 GiB should take 156 minutes; one
-# with 32 GiB should take 172 minutes, and so on, using the default
-# values.
-
-
-get_params() {
- base_time=300
- time_per_gig=10
- while [ $# -gt 0 ] ; do
- case $1 in
- --base-time) base_time="$2"
- shift
- ;;
- --time-per-gig) time_per_gig="$2"
- shift
- ;;
- *) echo "Usage: $0 [ --base-time <time> ] [ --time-per-gig <time> ]"
- exit 1
- ;;
- esac
- shift
- done
- local extra_time=$(($time_per_gig * $total_mem_in_GiB))
- variable_time=$(($base_time + $extra_time ))
-} # get_params()
-
-
-# Run an individual stressor
-# Input:
-# $1 = stressor name (e.g., malloc, brk)
-# $2 = run time
-# Output:
-# had_error -- sets to "1" if an error occurred
-run_stressor() {
- local runtime="$2"
- # Double runtime; will forcefully terminate if stress-ng
- # fails to return in that time.
- end_time=$((runtime*2))
- echo "Running stress-ng $1 stressor for $2 seconds...."
- logger -t "memory_stress_ng" "Running stress-ng $1 stressor for $2 seconds..."
- # Use "timeout" command to launch stress-ng, to catch it should it go into la-la land
- timeout -s 14 $end_time stress-ng -k --aggressive --verify --timeout $runtime --$1 0
- return_code="$?"
- echo "return_code is $return_code"
- if [ "$return_code" != "0" ] ; then
- # 
- # a small grace period to allow stressors to terminate 
- # 
- sleep 10 
- # 
- # still running? aggressively kill all stressors 
- # 
- pids=$(pidof stress-ng)
- if [ -n "$pids" ]; then
- kill -9 $pids
- sleep 1
- kill -9 $pids
- pids=$(pidof stress-ng)
- if [ -n "$pids" ]; then
- echo "Note: stress-ng (PIDS $pids) could not be killed"
- fi
- fi 
- had_error=1
- echo "*****************************************************************"
- if [ $return_code = "124" ] ; then
- echo "** stress-ng $stressor timed out and was forcefully "
- "terminated! (Error $return_code)"
- elif [ $return_code = "137" ] ; then
- echo "** stress-ng memory test timed out and SIGKILL was used to " \
- "terminate the test case! (Error $return_code)"
- else
- echo "** Error $return_code reported on stressor $stressor!)"
- fi
- echo "*****************************************************************"
- had_error=1
- result=$return_code
- fi
-} # run_stressor()
-
-
-#
-# Main program body....
-#
-
-swap_space=`cat /proc/meminfo | grep -i SwapTotal | tr -s " " | cut -f 2 -d " "`
-if [ -z $swap_space ] || [ $swap_space = "0" ] ; then
- echo "Swap space unavailable! Please activate swap space and re-run this test!"
- exit 1
-fi
-
-# Total memory in KiB....
-total_mem_in_KiB=`cat /proc/meminfo | grep MemTotal | tr -s " " | cut -f 2 -d " "`
-total_mem_in_GiB=$((($total_mem_in_KiB/1048576)+1))
-echo "Total memory is $total_mem_in_GiB GiB"
-
-get_params "$@"
-echo "Constant run time is $base_time seconds per stressor"
-echo "Variable run time is $variable_time seconds per stressor"
-
-had_error=0
-
-command -v numactl >/dev/null 2>&1
-if [ $? == 0 ] ; then
- numa_nodes=$(numactl --hardware | grep available | head -n 1 | cut -f 2 -d " ")
-else
- numa_nodes=1
-fi
-
-# NOTE: Specify stressors in two arrays rather than rely on stress-ng's
-# --class memory,vm option for two reasons:
-# 1. We want to run some stressors (those that exhaust all memory)
-# for longer than others, so we need to specify different run
-# times for different stressors.
-# 2. stress-ng is constantly being updated with new tests. We don't
-# want to run one set of tests on SUT 1 and a larger set of tests
-# on SUT 2 if we happen to have updated stress-ng for some unrelated
-# reason (like a bug fix); thus, we specify tests individually.
-
-# Constant-run-time stressors -- run them for the same length of time on all
-# systems....
-crt_stressors=("bsearch" "context" "hsearch" "lsearch" "matrix" \
- "memcpy" "null" "pipe" "qsort" "stack" "str" "stream" \
- "tsearch" "vm-rw" "wcs" "zero" "mlock" "mmapfork" "mmapmany" \
- "mremap" "shm-sysv" "vm-splice")
-if [ "$numa_nodes" -gt 1 ]; then
- crt_stressors+=("numa")
-fi
-crt_runtime=$((${#crt_stressors[@]}*$base_time))
-
-# Variable-run-time stressors -- run them longer on systems with more RAM....
-vrt_stressors=("malloc" "mincore" "vm" "bigheap" "brk" "mmap")
-vrt_runtime=$((${#vrt_stressors[@]}*$variable_time))
-
-total_runtime=$((($crt_runtime + $vrt_runtime) / 60))
-echo "Estimated total run time is $total_runtime minutes"
-echo ""
-
-for stressor in ${crt_stressors[@]}; do
- run_stressor $stressor $base_time
-done
-
-for stressor in ${vrt_stressors[@]}; do
- run_stressor $stressor $variable_time
-done
-
-echo "*******************************************************************"
-if [ $had_error = "0" ] ; then
- echo "** stress-ng memory test passed!"
-else
- echo "** stress-ng memory test failed; most recent error was $result"
-fi
-echo "*******************************************************************"
-exit $result
diff --git a/bin/stress_ng_test b/bin/stress_ng_test
new file mode 100755
index 0000000..ced8455
--- /dev/null
+++ b/bin/stress_ng_test
@@ -0,0 +1,594 @@
+#!/usr/bin/env python3
+"""
+Copyright (C) 2020 Canonical Ltd.
+
+Authors
+ Rod Smith <rod.smith@canonical.com>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 3,
+as published by the Free Software Foundation.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+The purpose of this script is to run CPU, memory, and disk stress
+tests using the stress-ng binary program. It replaces the older
+cpu_stress, memory_stress_ng, and disk_stress_ng Bash scripts.
+"""
+
+
+from argparse import (
+ ArgumentParser,
+ RawTextHelpFormatter
+)
+from subprocess import (
+ CalledProcessError,
+ PIPE,
+ Popen,
+ STDOUT,
+ TimeoutExpired
+)
+import os
+import psutil
+import shlex
+import shutil
+import signal
+import stat
+import sys
+import time
+import uuid
+
+# 10GiB (smallest acceptable size for disk tests):
+min_fs_size = 10 * 1024 * 1024 * 1024
+# Swap filename
+my_swap = None
+
+
+class stress_ng():
+ """Interfaces with the external stress-ng binary -- accepts
+ test parameters, runs the test, and enables access to test
+ results."""
+
+ def __init__(self,
+ stressors=['str'],
+ wrapper_timeout=25,
+ sng_timeout=20,
+ test_dir="/tmp",
+ extra_options=""):
+
+ self.stressors = stressors
+ self.wrapper_timeout = wrapper_timeout
+ self.sng_timeout = sng_timeout
+ self.test_dir = test_dir
+ self.extra_options = extra_options
+ self.results = ""
+ self.returncode = 0
+
+ def run(self):
+ """Run a stress-ng test, storing results in self.results."""
+
+ stressor_list = "--" + " 0 --".join(self.stressors)
+ command = "stress-ng --aggressive --verify --timeout {} {} {} 0". \
+ format(int(self.sng_timeout),
+ self.extra_options,
+ stressor_list)
+ time_str = time.strftime("%d %b %H:%M", time.gmtime())
+ if len(self.stressors) == 1:
+ print("{}: Running stress-ng {} stressor for {:.0f} seconds...".
+ format(time_str, self.stressors[0], self.sng_timeout))
+ else:
+ print("{}: Running multiple stress-ng ".format(time_str) +
+ "stressors in parallel for {:.0f}".format(self.sng_timeout))
+ print("seconds...")
+ try:
+ run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
+ local_results = run.communicate(timeout=self.wrapper_timeout)[0]
+ self.results = (local_results.decode(encoding="utf-8",
+ errors="ignore"))
+ self.returncode = run.returncode
+ if self.returncode != 0:
+ print("stress_ng exited with code {}".format(self.returncode))
+ except CalledProcessError as err:
+ print("stress_ng exited with code {}".format(err.returncode))
+ self.results = err.stdout
+ self.returncode = run.returncode
+ except TimeoutExpired:
+ print("stress_ng timed out!")
+ os.kill(run.pid, signal.SIGINT)
+ self.results = ""
+ # For consistency with old bash script & "timeout" wrapper...
+ self.returncode = 124
+ except KeyboardInterrupt:
+ self.results = ""
+ self.returncode = 125
+ return self.returncode
+
+ def get_results(self):
+ return self.results
+
+ def get_returncode(self):
+ return self.returncode
+
+
+"""Define CPU-related functions..."""
+
+
+def stress_cpu(args):
+ """Run stress-ng tests on CPUs."""
+
+ retval = 0
+ stressors = ['bsearch', 'context', 'cpu', 'crypt', 'hsearch', 'longjmp',
+ 'lsearch', 'matrix', 'qsort', 'str', 'stream', 'tsearch',
+ 'vecmath', 'wcs']
+ # Add 10% to runtime; will forcefully terminate if stress-ng
+ # fails to return in that time.
+ end_time = args.base_time * 11 / 10
+ print("Estimated total run time is {:.0f} minutes\n".
+ format(args.base_time/60))
+
+ test_object = stress_ng(stressors=stressors,
+ sng_timeout=args.base_time,
+ wrapper_timeout=end_time,
+ extra_options="--metrics-brief --tz --times")
+ retval = test_object.run()
+ print(test_object.get_results())
+ return retval
+
+
+"""Define memory-related functions..."""
+
+
+def num_numa_nodes():
+ """Return the number of NUMA nodes supported by the CPU."""
+
+ if shutil.which("numactl") is None:
+ return 1
+ else:
+ command = "numactl --hardware"
+ numactl = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
+ local_results = numactl.communicate()[0].split()
+ # local_results[1] will sometimes hold the number of NUMA nodes;
+ # but "numactl --hardware" sometimes returns the error message
+ # "No NUMA available on this system", so if this (or some other)
+ # error message appears, assume one NUMA node....
+ try:
+ num_nodes = int(local_results[1])
+ except ValueError:
+ num_nodes = 1
+ return num_nodes
+
+
+def swap_space_ok(args):
+ """Check available swap space. If too small, add more. The minimum
+ acceptable mount is defined as the GREATER of the amount specified
+ by the command-line -s/--swap-space option OR the amount specified
+ by the STRESS_NG_MIN_SWAP_SIZE environment variable. Both values are
+ specified in gibibytes (GiB). If neither is specified, a value of 0
+ (no swap required) is assumed.
+ Returns:
+ - True if OK (already or after adding more)
+ - False if insufficient swap space"""
+
+ retval = 0
+ all_ok = True
+ global my_swap
+ min_swap_space = 0
+ if "STRESS_NG_MIN_SWAP_SIZE" in os.environ:
+ min_swap_space = int(os.environ['STRESS_NG_MIN_SWAP_SIZE']) \
+ * 1024 * 1024 * 1024
+ if args.swap_size > min_swap_space:
+ min_swap_space = args.swap_size * 1024 * 1024 * 1024
+ print("Minimum swap space is set to {:.0f} GiB".
+ format(min_swap_space / 1024 / 1024 / 1024))
+ swap = psutil.swap_memory()
+ if swap.total < min_swap_space:
+ print("Swap space too small! Attempting to add more (this may take " +
+ "a while)....")
+ my_swap = "/swap-{}.img".format(uuid.uuid1())
+ # Create swap file 10KiB bigger than minimum because there's a 4KiB
+ # overhead in the file, so if it were exactly the minimum, it would
+ # still be too small....
+ try:
+ with open(my_swap, "w+b") as f:
+ # Swap file zeroed out and increased in size in 1KiB chunks to
+ # avoid problems with sparse files and creating temporary RAM
+ # use that potentially exceeds available RAM....
+ for i in range(int((min_swap_space + 10240) / 1024)):
+ f.write(b"\x00" * 1024)
+ f.close()
+ except OSError:
+ print("Unable to create temporary swap file! Aborting test!")
+ f.close()
+ os.remove(my_swap)
+ all_ok = False
+ if all_ok:
+ os.chmod(my_swap, stat.S_IRUSR | stat.S_IWUSR)
+ cmd = "mkswap {}".format(my_swap)
+ Popen(shlex.split(cmd), stderr=STDOUT, stdout=PIPE).communicate()[0]
+ cmd = "swapon {}".format(my_swap)
+ Popen(shlex.split(cmd), stderr=STDOUT, stdout=PIPE).communicate()[0]
+ else:
+ retval = False
+ swap = psutil.swap_memory()
+ if swap.total < min_swap_space:
+ retval = False
+ else:
+ retval = True
+ return retval
+
+
+def stress_memory(args):
+ """Run stress-ng tests on memory."""
+
+ retval = 0
+ if not swap_space_ok(args):
+ return 130
+
+ ram = psutil.virtual_memory()
+ total_mem_in_gb = ram.total / 1073741824
+ vrt = args.base_time + total_mem_in_gb * args.time_per_gig
+ print("Total memory is {:.1f} GiB".format(total_mem_in_gb))
+ print("Constant run time is {} seconds per stressor".
+ format(args.base_time))
+ print("Variable run time is {:.0f} seconds per stressor".format(vrt))
+ print("Number of NUMA nodes is {}".format(num_numa_nodes()))
+
+ # Constant-run-time stressors -- run them for the same length of time on
+ # all systems....
+ crt_stressors = ['bsearch', 'context', 'hsearch', 'lsearch', 'matrix',
+ 'memcpy', 'null', 'pipe', 'qsort', 'stack', 'str',
+ 'stream', 'tsearch', 'vm-rw', 'wcs', 'zero', 'mlock',
+ 'mmapfork', 'mmapmany', 'mremap', 'shm-sysv',
+ 'vm-splice']
+ if num_numa_nodes() > 1:
+ crt_stressors.append('numa')
+
+ # Variable-run-time stressors -- run longer on systems with more RAM....
+ vrt_stressors = ['malloc', 'mincore', 'vm', 'bigheap', 'brk', 'mmap']
+
+ est_runtime = len(crt_stressors) * args.base_time + \
+ len(vrt_stressors) * vrt
+ print("Estimated total run time is {:.0f} minutes\n".
+ format(est_runtime/60))
+ for stressor in crt_stressors:
+ test_object = stress_ng(stressors=stressor.split(),
+ sng_timeout=args.base_time,
+ wrapper_timeout=args.base_time*2)
+ retval = retval | test_object.run()
+ print(test_object.get_results())
+ for stressor in vrt_stressors:
+ test_object = stress_ng(stressors=stressor.split(), sng_timeout=vrt,
+ wrapper_timeout=vrt*2)
+ retval = retval | test_object.run()
+ print(test_object.get_results())
+ if my_swap is not None and args.keep_swap is False:
+ print("Deleting temporary swap file....")
+ cmd = "swapoff {}".format(my_swap)
+ Popen(shlex.split(cmd), stderr=STDOUT, stdout=PIPE).communicate()[0]
+ os.remove(my_swap)
+ return retval
+
+
+"""Define disk-related functions..."""
+
+
+def get_partition_data(file):
+ """Get partition details (size & type) on /dev/{file} & return in
+ dictionary."""
+
+ part_data = {}
+ part_data['name'] = file
+
+ # Get size of device, in bytes....
+ command = "blockdev --getsize64 /dev/{}".format(file)
+ run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
+ part_data['size'] = int(run.communicate()[0])
+
+ # Get filesystem type....
+ part_data['fs_type'] = ""
+ command = "blkid /dev/{} -o export".format(file)
+ run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
+ local_results = run.communicate()[0].split()
+ for result in local_results:
+ result_str = result.decode(encoding="utf-8", errors="ignore")
+ if "TYPE" in result_str:
+ part_data['fs_type'] = result_str.split("=")[1]
+ return part_data
+
+
+def find_mount_point(file):
+ """Find the mount point of /dev/{file}.
+ Returns:
+ * None if unmounted
+ * The mount point (as a string) if it's mounted."""
+
+ mount_point = None
+ command = "df /dev/{} --output=target".format(file)
+ run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
+ output = run.communicate()[0].decode(encoding="utf-8", errors="ignore"). \
+ split()
+ potential_mount_point = str(output[-1])
+ # If df is fed a non-mounted-partition, it returns "/dev" as the
+ # mount point, so ignore that....
+ if potential_mount_point != "/dev":
+ mount_point = potential_mount_point
+ return mount_point
+
+
+class disk():
+ """Interfaces to disk device, to check device status, find largest
+ partition, mount it, etc."""
+
+ def __init__(self, device=""):
+ self.device = device
+ self.all_parts = []
+ self.unsupported_fs = None
+ self.test_dir = "/tmp"
+ lvm_detected = False
+ # Find final element of device name; for instance "sda" for "/dev/sda"
+ stripped_devname = self.device.split("/")[-1]
+
+ # Do first pass to collect data on partitions & software RAID
+ # devices (which we treat like partitions)....
+ for file in os.listdir("/sys/class/block"):
+ if stripped_devname in file:
+ part_data = get_partition_data(file)
+ part_data['part_type'] = "partition"
+ if part_data['fs_type'] == "LVM2_member":
+ lvm_detected = True
+ self.all_parts.append(part_data)
+
+ # Do another pass to collect data on logical volumes, if any exist
+ # on the target device....
+ # NOTE: This code ignores where an LVM exists; it could span multiple
+ # disks, or be on one other than the one being tested. Canonical
+ # certification specifies use of partitions, not LVMs, so this code
+ # exists mainly for software development using development systems,
+ # not on servers actually being tested.
+ if lvm_detected:
+ for file in os.listdir("/sys/class/block/"):
+ if "dm-" in file:
+ part_data = get_partition_data(file)
+ part_data['part_type'] = "lv"
+ self.all_parts.append(part_data)
+
+ def is_block_device(self):
+ try:
+ mode = os.stat(self.device).st_mode
+ if not stat.S_ISBLK(mode):
+ print("{} is NOT a block device! Aborting!".
+ format(self.device))
+ return False
+ except FileNotFoundError:
+ print("{} does not exist! Aborting!".format(self.device))
+ return False
+ return True
+
+ def find_largest_partition(self):
+ """Find the largest partition that holds a supported filesystem on
+ self.device. Sets:
+ self.largest_part -- Dictionary containing information on largest
+ partition
+ self.unsupported_fs -- Empty or contains information about largest
+ unsupported filesystem (of certain known types)
+ found on disk"""
+
+ self.largest_part = {'name': "",
+ 'size': 0,
+ 'part_type': "lv",
+ 'fs_type': ""}
+ self.unsupported_fs = None
+
+ # A filesystem can be supported for the test; unsupported but worth
+ # noting in an error message; or unsupported and not worth noting.
+ # The first two categories are enumerated in lists....
+ supported_filesystems = ['ext2', 'ext3', 'ext4', 'xfs', 'jfs', 'btrfs']
+ unsupported_filesystems = ['ntfs', 'vfat', 'hfs', 'LVM2_member']
+
+ for part in self.all_parts:
+ new_sz = int(part['size'])
+ old_sz = int(self.largest_part['size'])
+ new_lv = part['part_type'] == "lv"
+ old_lv = self.largest_part['part_type'] == "lv"
+ if (new_sz > 0 and old_sz == 0) or \
+ (new_sz > min_fs_size and old_sz < min_fs_size) or \
+ (new_sz > min_fs_size and new_sz > old_sz and old_lv) or \
+ (new_sz > old_sz and not new_lv):
+ if part['fs_type'] in supported_filesystems:
+ self.largest_part = part
+ elif part['fs_type'] in unsupported_filesystems:
+ # Make note of it if it might be an old filesystem
+ # that was not properly re-allocated....
+ self.unsupported_fs = part
+ return self.largest_part
+
+ def mount_filesystem(self, simulate):
+ print("Disk device is {}".format(self.device))
+ target_part = self.find_largest_partition()
+ if target_part['name'] == "":
+ if self.unsupported_fs is not None:
+ print("A filesystem of type {} was found, but is not "
+ "supported by this test.".
+ format(self.unsupported_fs['fs_type']))
+ print("A Linux-native filesystem (ext2/3/4fs, XFS, JFS, or "
+ "Btrfs) is required.")
+ else:
+ print("No suitable partition found!")
+ return False
+
+ if target_part['size'] < min_fs_size:
+ print("Warning: {} is less than {:.0f} GiB in size!".
+ format(target_part['name'], min_fs_size/1024/1024/1024))
+ print("Disk is too small to test. Aborting test!")
+ return False
+
+ full_device = "/dev/{}".format(target_part['name'])
+ print("Testing partition {}".format(full_device))
+ mount_point = find_mount_point(target_part['name'])
+ if simulate:
+ print("Run with --simulate, so not mounting filesystems.")
+ print("If run without --simulate, would mount {} to {}".
+ format(full_device, mount_point))
+ print("(if not already mounted).")
+ else:
+ if not mount_point:
+ mount_point = "/mnt/{}".format(target_part['name'])
+ print("Trying to mount {} to {}...".
+ format(full_device, mount_point))
+ os.makedirs(mount_point, exist_ok=True)
+ command = "mount {} {}".format(full_device, mount_point)
+ run = Popen(shlex.split(command), stderr=STDOUT, stdout=PIPE)
+ output = run.communicate()[0].decode(encoding="utf-8",
+ errors="ignore")
+ print(output)
+ else:
+ print("{} is already mounted at {}".
+ format(full_device, mount_point))
+ self.test_dir = "{}/tmp/stress-ng-{}".format(mount_point,
+ uuid.uuid1())
+ os.makedirs(self.test_dir, exist_ok=True)
+ return True
+
+
+def stress_disk(args):
+ """Run stress-ng tests on disk."""
+
+ disk_stressors = ['aio', 'aiol', 'chdir', 'chmod', 'chown', 'dentry',
+ 'dir', 'fallocate', 'fiemap', 'filename', 'flock',
+ 'fstat', 'hdd', 'ioprio', 'lease', 'locka', 'lockf',
+ 'lockofd', 'madvise', 'mknod', 'msync', 'readahead',
+ 'seal', 'seek', 'sync-file', 'xattr']
+
+ retval = 0
+ if "/dev" not in args.device and args.device != "":
+ args.device = "/dev/" + args.device
+
+ test_disk = disk(args.device)
+ if not test_disk.is_block_device():
+ return 131
+ if test_disk.mount_filesystem(args.simulate):
+ est_runtime = len(disk_stressors) * args.base_time
+ print("Using test directory: '{}'".format(test_disk.test_dir))
+ print("Estimated total run time is {:.0f} minutes\n".
+ format(est_runtime/60))
+ retval = 0
+ if not args.simulate:
+ for stressor in disk_stressors:
+ disk_options = "--temp-path {} ".format(test_disk.test_dir) + \
+ "--hdd-opts dsync --readahead-bytes 16M -k"
+ test_object = stress_ng(stressors=stressor.split(),
+ sng_timeout=args.base_time,
+ wrapper_timeout=args.base_time*5,
+ extra_options=disk_options)
+ retval = retval | test_object.run()
+ print(test_object.get_results())
+ if test_disk.test_dir != "/tmp" and not args.simulate:
+ shutil.rmtree(test_disk.test_dir, ignore_errors=True)
+ else:
+ retval = 132
+
+ return retval
+
+
+"""Main program body..."""
+
+
+def main():
+ """Run a stress_ng-based stress run."""
+
+ parser = ArgumentParser(
+ description="Run tests based on stress-ng",
+ formatter_class=RawTextHelpFormatter)
+ subparsers = parser.add_subparsers()
+
+ # Main cli options
+ cpu_parser = subparsers.add_parser('cpu', help=("Run CPU tests"))
+ memory_parser = subparsers.add_parser('memory', help=("Run memory tests"))
+ disk_parser = subparsers.add_parser('disk', help=("Run disk tests"))
+
+ # Sub test options
+ # action = test_parser.add_mutually_exclusive_group()
+
+ # CPU parameters
+ cpu_parser.add_argument("-b", "--base-time", type=int, default=7200,
+ help="Run time, in seconds (default=7200)")
+
+ # Memory parameters
+ memory_parser.add_argument("-b", "--base-time", type=int,
+ help="Base time for each test, in seconds " +
+ "(default=300)", default=300)
+ memory_parser.add_argument("-t", "--time-per-gig", type=int,
+ help="Extra time per GiB for some stressors " +
+ "(default=10)", default=10)
+ memory_parser.add_argument("-s", "--swap-size", type=int,
+ help="swap size in GiB", default=0)
+ memory_parser.add_argument("-k", "--keep-swap", action="store_true",
+ help="Keep swap file, if added by test")
+
+ # Disk parameters
+ disk_parser.add_argument("-d", "--device", type=str, required=True,
+ help="Disk device (/dev/sda, etc.)")
+ disk_parser.add_argument("-b", "--base-time", type=int,
+ help="Time for each test, in seconds " +
+ "(default=240)", default=240)
+ disk_parser.add_argument("-s", "--simulate", action="store_true",
+ help="Report disk info, but don't run tests")
+
+ cpu_parser.set_defaults(func=stress_cpu)
+ memory_parser.set_defaults(func=stress_memory)
+ disk_parser.set_defaults(func=stress_disk)
+
+ args = parser.parse_args()
+
+ # logging.basicConfig(level=logging.INFO)
+
+ if shutil.which("stress-ng") is None:
+ print("The stress-ng utility is not installed; exiting!")
+ return(128)
+ if not os.geteuid() == 0:
+ print("This program must be run as root (or via sudo); exiting!")
+ return(129)
+
+ retval = 1
+ if 'func' not in args:
+ parser.print_help()
+ else:
+ retval = args.func(args)
+ print("**************************************************************")
+ if retval == 0:
+ print("* stress-ng test passed!")
+ elif retval == 124: # Terminated by Python timeout
+ print("** stress-ng test timed out and was forcefully ")
+ print(" terminated (Error {})".format(retval))
+ elif retval == 125: # Terminated by SIGINT
+ print("** stress-ng test timed out and SIGINT (Ctrl+C) " +
+ "was used to terminate")
+ print(" the test (Error {})!".format(retval))
+ elif retval == 130: # Insufficient swap space for memory test
+ print("** Swap space unavailable! Please activate swap space " +
+ "and re-run this test!")
+ print(" (Error {})".format(retval))
+ elif retval == 131: # Alleged disk device is not a device file
+ print("** {} is not a block device! Aborting!".format(args.device))
+ print(" (Error {})".format(retval))
+ elif retval == 132: # Unable to find a partition for disk test
+ print("** Unable to find a suitable partition! Aborting!")
+ print(" (Error {})".format(retval))
+ elif retval == 137: # Terminated by SIGKILL
+ print("** stress-ng test timed out and SIGKILL was used to ")
+ print(" terminate the test (Error {})!".format(retval))
+ else:
+ print("stress-ng test failed with return code: {}".format(retval))
+ print("**************************************************************")
+
+ return(retval)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/units/disk/jobs.pxu b/units/disk/jobs.pxu
index 3e0f568..431cb2e 100644
--- a/units/disk/jobs.pxu
+++ b/units/disk/jobs.pxu
@@ -94,10 +94,10 @@ command:
 if [ -n "$STRESS_NG_DISK_TIME" ]
 then
 echo "Found STRESS_NG_DISK_TIME env var, stress_ng disk running time is now: $STRESS_NG_DISK_TIME seconds"
- disk_stress_ng {name} --base-time $STRESS_NG_DISK_TIME --really-run
+ stress_ng_test disk --device {name} --base-time $STRESS_NG_DISK_TIME
 else
 echo "STRESS_NG_DISK_TIME env var is not found, stress_ng disk running time is default value"
- disk_stress_ng {name} --base-time 240 --really-run
+ stress_ng_test disk --device {name} --base-time 240
 fi
 
 unit: template
diff --git a/units/memory/jobs.pxu b/units/memory/jobs.pxu
index 48d543c..5fad4f4 100644
--- a/units/memory/jobs.pxu
+++ b/units/memory/jobs.pxu
@@ -44,9 +44,10 @@ category_id: com.canonical.plainbox::memory
 id: memory/memory_stress_ng
 estimated_duration: 11000.0
 user: root
+environ: STRESS_NG_MIN_SWAP_SIZE
 requires:
 package.name == 'stress-ng' or executable.name == 'stress-ng'
-command: memory_stress_ng
+command: stress_ng_test memory
 _summary: Stress test of system memory
 _description:
 Test to perform some basic stress and exercise of system memory via the
diff --git a/units/stress/jobs.pxu b/units/stress/jobs.pxu
index cf44c6c..0a1ce27 100644
--- a/units/stress/jobs.pxu
+++ b/units/stress/jobs.pxu
@@ -22,10 +22,10 @@ command:
 if [ -n "$STRESS_NG_CPU_TIME" ]
 then
 echo "Found STRESS_NG_CPU_TIME env var, stress_ng cpu running time is now: $STRESS_NG_CPU_TIME seconds"
- cpu_stress --runtime $STRESS_NG_CPU_TIME
+ stress_ng_test cpu --base-time $STRESS_NG_CPU_TIME
 else
 echo STRESS_NG_CPU_TIME env var is not found, stress_ng cpu running time is default value
- cpu_stress --runtime 7200
+ stress_ng_test cpu --base-time 7200
 fi
 _summary:
 Stress of CPUs (very long runtime)
author	PMR <pmr@pmr-lander>	2020-02-25 16:35:50 +0000
committer	PMR <pmr@pmr-lander>	2020-02-25 16:35:50 +0000
commit	0c06eadd115b2cf015b8edcb756fe9236c151753 (patch)
tree	991c7b64a43a8803434358237fa40ca4f09d80c8
parent	b5986748a745f0c41231739d82e31b5fea03b885 (diff)
parent	7f54a248b998a84b58cf26f4e53652766ff10f71 (diff)