summaryrefslogtreecommitdiff
diff options
authorMaciej Kisielewski <maciej.kisielewski@canonical.com>2017-05-18 14:15:29 +0200
committerMaciej Kisielewski <maciej.kisielewski@canonical.com>2017-05-18 14:15:29 +0200
commit384d44b674450bab72f93b32ac949edeb51fde63 (patch)
tree29408971a51e229b22e5f66501c6abcd0a0b5b5f
parent141ea08a0f521e84cc59c09df67631c2f36c3309 (diff)
make cpu_offlining test work on many-cpu systems
Fixes LP: #1682328 Signed-off-by: Maciej Kisielewski <maciej.kisielewski@canonical.com>
-rwxr-xr-xbin/cpu_offlining124
1 files changed, 72 insertions, 52 deletions
diff --git a/bin/cpu_offlining b/bin/cpu_offlining
index 7f1095a..0a673d8 100755
--- a/bin/cpu_offlining
+++ b/bin/cpu_offlining
@@ -1,52 +1,72 @@
-#!/bin/bash
-
-result=0
-cpu_count=0
-offline_fails="Offline Failed:"
-online_fails="Online Failed:"
-exitcode=0
-
-# Turn CPU cores off
-for cpu_num in `ls /sys/devices/system/cpu | grep -o cpu[0-9]*`; do
- if [ -f /sys/devices/system/cpu/$cpu_num/online ]; then
- if [ "$cpu_num" != "cpu0" ]; then
- ((cpu_count++))
- echo 0 > /sys/devices/system/cpu/$cpu_num/online
- sleep 0.5
- output=`grep -w -i $cpu_num /proc/interrupts`
- result=$?
- if [ $result -eq 0 ]; then
- echo "ERROR: Failed to offline $cpu_num" 1>&2
- offline_fails="$offline_fails $cpu_num"
- exitcode=1
- fi
- fi
- fi
-done
-
-# Back on again
-for cpu_num in `ls /sys/devices/system/cpu | grep -o cpu[0-9]*`; do
- if [ -f /sys/devices/system/cpu/$cpu_num/online ]; then
- if [ "$cpu_num" != "cpu0" ]; then
- echo 1 > /sys/devices/system/cpu/$cpu_num/online
- sleep 0.5
- output=`grep -w -i $cpu_num /proc/interrupts`
- result=$?
- if [ $result -eq 1 ]; then
- echo "ERROR: Failed to online $cpu_num" 1>&2
- online_fails="$online_fails $cpu_num"
- exitcode=1
- fi
- fi
- fi
-done
-
-if [ $exitcode -eq 0 ]; then
- echo "Successfully turned $cpu_count cores off and back on"
-else
- echo "Error with offlining one or more cores. CPU offline may not work if this is an ARM system." 1>&2
- echo $offline_fails 1>&2
- echo $online_fails 1>&2
-fi
-
-exit $exitcode
+#!/usr/bin/env python3
+
+from glob import glob
+from os.path import basename
+from math import ceil
+from time import sleep
+import sys
+
+
+def offline_cpu(cpu_name):
+ with open('/sys/devices/system/cpu/{}/online'.format(cpu_name), 'wt') as f:
+ f.write('0\n')
+
+
+def online_cpu(cpu_name):
+ with open('/sys/devices/system/cpu/{}/online'.format(cpu_name), 'wt') as f:
+ f.write('1\n')
+
+
+def is_cpu_online(cpu_name):
+ # use the same heuristic as original `cpu_offlining` test used which is to
+ # check if cpu is mentioned in /proc/interrupts
+ with open('/proc/interrupts', 'rt') as f:
+ header = f.readline().lower()
+ return cpu_name in header
+
+
+def main():
+ cpus = [basename(x) for x in glob('/sys/devices/system/cpu/cpu[0-9]*')]
+ # sort *numerically* cpus by their number, ignoring first 3 characters
+ # so ['cpu1', 'cpu11', 'cpu2'] is sorted to ['cpu1', 'cpu2', 'cpu11']
+ cpus.sort(key=lambda x: int(x[3:]))
+ with open('/proc/interrupts', 'rt') as f:
+ interrupts_count = len(f.readlines()) - 1 # first line is a header
+
+ # there is an arch limit on how many interrupts one cpu can handle
+ # according to LP: 1682328 it's 224. So we have to reserve some CPUs for
+ # handling them
+ max_ints_per_cpu = 224
+ reserved_cpus_count = ceil(interrupts_count / max_ints_per_cpu)
+
+ failed_offlines = []
+
+ for cpu in cpus[reserved_cpus_count:]:
+ offline_cpu(cpu)
+ sleep(0.5)
+ if is_cpu_online(cpu):
+ print("ERROR: Failed to offline {}".format(cpu), file=sys.stderr)
+ failed_offlines.append(cpu)
+
+ failed_onlines = []
+
+ for cpu in cpus[reserved_cpus_count:]:
+ online_cpu(cpu)
+ sleep(0.5)
+ if not is_cpu_online(cpu):
+ print("ERROR: Failed to online {}".format(cpu), file=sys.stderr)
+ failed_onlines.append(cpu)
+
+ if not failed_offlines and not failed_onlines:
+ print("Successfully turned {} cores off and back on".format(
+ len(cpus) - reserved_cpus_count))
+ return 0
+ else:
+ print("Error with offlining one or more cores. CPU offline may not "
+ "work if this is an ARM system.", file=sys.stderr)
+ print(' '.join(failed_offlines))
+ print(' '.join(failed_onlines))
+ return 1
+
+if __name__ == '__main__':
+ main()