diff options
author | Maciej Kisielewski <maciej.kisielewski@canonical.com> | 2017-05-18 14:15:29 +0200 |
---|---|---|
committer | Maciej Kisielewski <maciej.kisielewski@canonical.com> | 2017-05-18 14:15:29 +0200 |
commit | 384d44b674450bab72f93b32ac949edeb51fde63 (patch) | |
tree | 29408971a51e229b22e5f66501c6abcd0a0b5b5f | |
parent | 141ea08a0f521e84cc59c09df67631c2f36c3309 (diff) |
make cpu_offlining test work on many-cpu systems
Fixes LP: #1682328 Signed-off-by: Maciej Kisielewski <maciej.kisielewski@canonical.com>
-rwxr-xr-x | bin/cpu_offlining | 124 |
1 files changed, 72 insertions, 52 deletions
diff --git a/bin/cpu_offlining b/bin/cpu_offlining index 7f1095a..0a673d8 100755 --- a/bin/cpu_offlining +++ b/bin/cpu_offlining @@ -1,52 +1,72 @@ -#!/bin/bash - -result=0 -cpu_count=0 -offline_fails="Offline Failed:" -online_fails="Online Failed:" -exitcode=0 - -# Turn CPU cores off -for cpu_num in `ls /sys/devices/system/cpu | grep -o cpu[0-9]*`; do - if [ -f /sys/devices/system/cpu/$cpu_num/online ]; then - if [ "$cpu_num" != "cpu0" ]; then - ((cpu_count++)) - echo 0 > /sys/devices/system/cpu/$cpu_num/online - sleep 0.5 - output=`grep -w -i $cpu_num /proc/interrupts` - result=$? - if [ $result -eq 0 ]; then - echo "ERROR: Failed to offline $cpu_num" 1>&2 - offline_fails="$offline_fails $cpu_num" - exitcode=1 - fi - fi - fi -done - -# Back on again -for cpu_num in `ls /sys/devices/system/cpu | grep -o cpu[0-9]*`; do - if [ -f /sys/devices/system/cpu/$cpu_num/online ]; then - if [ "$cpu_num" != "cpu0" ]; then - echo 1 > /sys/devices/system/cpu/$cpu_num/online - sleep 0.5 - output=`grep -w -i $cpu_num /proc/interrupts` - result=$? - if [ $result -eq 1 ]; then - echo "ERROR: Failed to online $cpu_num" 1>&2 - online_fails="$online_fails $cpu_num" - exitcode=1 - fi - fi - fi -done - -if [ $exitcode -eq 0 ]; then - echo "Successfully turned $cpu_count cores off and back on" -else - echo "Error with offlining one or more cores. CPU offline may not work if this is an ARM system." 1>&2 - echo $offline_fails 1>&2 - echo $online_fails 1>&2 -fi - -exit $exitcode +#!/usr/bin/env python3 + +from glob import glob +from os.path import basename +from math import ceil +from time import sleep +import sys + + +def offline_cpu(cpu_name): + with open('/sys/devices/system/cpu/{}/online'.format(cpu_name), 'wt') as f: + f.write('0\n') + + +def online_cpu(cpu_name): + with open('/sys/devices/system/cpu/{}/online'.format(cpu_name), 'wt') as f: + f.write('1\n') + + +def is_cpu_online(cpu_name): + # use the same heuristic as original `cpu_offlining` test used which is to + # check if cpu is mentioned in /proc/interrupts + with open('/proc/interrupts', 'rt') as f: + header = f.readline().lower() + return cpu_name in header + + +def main(): + cpus = [basename(x) for x in glob('/sys/devices/system/cpu/cpu[0-9]*')] + # sort *numerically* cpus by their number, ignoring first 3 characters + # so ['cpu1', 'cpu11', 'cpu2'] is sorted to ['cpu1', 'cpu2', 'cpu11'] + cpus.sort(key=lambda x: int(x[3:])) + with open('/proc/interrupts', 'rt') as f: + interrupts_count = len(f.readlines()) - 1 # first line is a header + + # there is an arch limit on how many interrupts one cpu can handle + # according to LP: 1682328 it's 224. So we have to reserve some CPUs for + # handling them + max_ints_per_cpu = 224 + reserved_cpus_count = ceil(interrupts_count / max_ints_per_cpu) + + failed_offlines = [] + + for cpu in cpus[reserved_cpus_count:]: + offline_cpu(cpu) + sleep(0.5) + if is_cpu_online(cpu): + print("ERROR: Failed to offline {}".format(cpu), file=sys.stderr) + failed_offlines.append(cpu) + + failed_onlines = [] + + for cpu in cpus[reserved_cpus_count:]: + online_cpu(cpu) + sleep(0.5) + if not is_cpu_online(cpu): + print("ERROR: Failed to online {}".format(cpu), file=sys.stderr) + failed_onlines.append(cpu) + + if not failed_offlines and not failed_onlines: + print("Successfully turned {} cores off and back on".format( + len(cpus) - reserved_cpus_count)) + return 0 + else: + print("Error with offlining one or more cores. CPU offline may not " + "work if this is an ARM system.", file=sys.stderr) + print(' '.join(failed_offlines)) + print(' '.join(failed_onlines)) + return 1 + +if __name__ == '__main__': + main() |