Skip to content
9 changes: 9 additions & 0 deletions lib/schema_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,15 @@ def check_usage_scenario(self, usage_scenario):
Optional("environment"): self.single_or_list(Or(dict,str)),
Optional("ports"): self.single_or_list(Or(str, int)),
Optional("depends_on"): Or([str],dict),
Optional("healthcheck"): {
Optional('test'): Or(list, str),
Optional('interval'): str,
Optional('timeout'): str,
Optional('retries'): int,
Optional('start_period'): str,
# Optional('start_interval'): str, docker CLI does not support this atm
Optional('disable'): bool,
},
Optional("setup-commands"): [str],
Optional("volumes"): self.single_or_list(str),
Optional("folder-destination"):str,
Expand Down
2 changes: 1 addition & 1 deletion metric_providers/psu/energy/ac/mcp/machine/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from metric_providers.base import BaseMetricProvider

class PsuEnergyAcMcpMachineProvider(BaseMetricProvider):
def __init__(self, resolutions, skip_check=False):
def __init__(self, resolution, skip_check=False):
super().__init__(
metric_name='psu_energy_ac_mcp_machine',
metrics={'time': int, 'value': int},
Expand Down
2 changes: 1 addition & 1 deletion metric_providers/psu/energy/ac/xgboost/machine/model
Submodule model updated from 16f45e to e42aff
121 changes: 97 additions & 24 deletions runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def __init__(self,
name, uri, uri_type, filename='usage_scenario.yml', branch=None,
debug_mode=False, allow_unsafe=False, no_file_cleanup=False, skip_system_checks=False,
skip_unsafe=False, verbose_provider_boot=False, full_docker_prune=False,
dry_run=False, dev_repeat_run=False, docker_prune=False, job_id=None):
dev_no_sleeps=False, dev_no_build=False, dev_no_metrics=False, docker_prune=False, job_id=None):

if skip_unsafe is True and allow_unsafe is True:
raise RuntimeError('Cannot specify both --skip-unsafe and --allow-unsafe')
Expand All @@ -104,8 +104,9 @@ def __init__(self,
self._verbose_provider_boot = verbose_provider_boot
self._full_docker_prune = full_docker_prune
self._docker_prune = docker_prune
self._dry_run = dry_run
self._dev_repeat_run = dev_repeat_run
self._dev_no_sleeps = dev_no_sleeps
self._dev_no_build = dev_no_build
self._dev_no_metrics = dev_no_metrics
self._uri = uri
self._uri_type = uri_type
self._original_filename = filename
Expand Down Expand Up @@ -145,7 +146,7 @@ def __init__(self,
# self.__filename = self._original_filename # this can be changed later if working directory changes

def custom_sleep(self, sleep_time):
if not self._dry_run:
if not self._dev_no_sleeps:
print(TerminalColors.HEADER, '\nSleeping for : ', sleep_time, TerminalColors.ENDC)
time.sleep(sleep_time)

Expand Down Expand Up @@ -387,13 +388,13 @@ def check_running_containers(self):
def populate_image_names(self):
for service_name, service in self._usage_scenario.get('services', {}).items():
if not service.get('image', None): # image is a non-mandatory field. But we need it, so we tmp it
if self._dev_repeat_run:
if self._dev_no_build:
service['image'] = f"{service_name}"
else:
service['image'] = f"{service_name}_{random.randint(500000,10000000)}"

def remove_docker_images(self):
if self._dev_repeat_run:
if self._dev_no_build:
return

print(TerminalColors.HEADER, '\nRemoving all temporary GMT images', TerminalColors.ENDC)
Expand Down Expand Up @@ -477,6 +478,10 @@ def update_and_insert_specs(self):
)

def import_metric_providers(self):
if self._dev_no_metrics:
print(TerminalColors.HEADER, '\nSkipping import of metric providers', TerminalColors.ENDC)
return

config = GlobalConfig().config

print(TerminalColors.HEADER, '\nImporting metric providers', TerminalColors.ENDC)
Expand Down Expand Up @@ -520,6 +525,10 @@ def import_metric_providers(self):
self.__metric_providers.sort(key=lambda item: 'rapl' not in item.__class__.__name__.lower())

def download_dependencies(self):
if self._dev_no_build:
print(TerminalColors.HEADER, '\nSkipping downloading dependencies', TerminalColors.ENDC)
return

print(TerminalColors.HEADER, '\nDownloading dependencies', TerminalColors.ENDC)
subprocess.run(['docker', 'pull', 'gcr.io/kaniko-project/executor:latest'], check=True)

Expand Down Expand Up @@ -567,6 +576,7 @@ def build_docker_images(self):
encoding='UTF-8',
check=True)
# The image exists so exit and don't build
print(f"Image {service['image']} exists in build cache. Skipping build ...")
continue
except subprocess.CalledProcessError:
pass
Expand Down Expand Up @@ -656,10 +666,11 @@ def order_service_names(service_name, visited=None):
raise RuntimeError(f"Cycle found in depends_on definition with service '{service_name}'!")
visited.add(service_name)

if service_name not in services:
raise RuntimeError(f"Dependent service '{service_name}' defined in 'depends_on' does not exist in usage_scenario!")

service = services[service_name]
if 'depends_on' in service:
if isinstance(service['depends_on'], dict):
raise RuntimeError(f"Service definition of {service_name} uses the long form of 'depends_on', however, GMT only supports the short form!")
for dep in service['depends_on']:
if dep not in names_ordered:
order_service_names(dep, visited)
Expand Down Expand Up @@ -834,6 +845,35 @@ def setup_services(self):
if 'pause-after-phase' in service:
self.__services_to_pause_phase[service['pause-after-phase']] = self.__services_to_pause_phase.get(service['pause-after-phase'], []) + [container_name]

if 'healthcheck' in service: # must come last
if 'disable' in service['healthcheck'] and service['healthcheck']['disable'] is True:
docker_run_string.append('--no-healthcheck')
else:
if 'test' in service['healthcheck']:
docker_run_string.append('--health-cmd')
health_string = service['healthcheck']['test']
if isinstance(service['healthcheck']['test'], list):
health_string_copy = service['healthcheck']['test'].copy()
health_string_command = health_string_copy.pop(0)
if health_string_command not in ['CMD', 'CMD-SHELL']:
raise RuntimeError(f"Healthcheck starts with {health_string_command}. Please use 'CMD' or 'CMD-SHELL' when supplying as list. For disabling do not use 'NONE' but the disable argument.")
health_string = ' '.join(health_string_copy)
docker_run_string.append(health_string)
if 'interval' in service['healthcheck']:
docker_run_string.append('--health-interval')
docker_run_string.append(service['healthcheck']['interval'])
if 'timeout' in service['healthcheck']:
docker_run_string.append('--health-timeout')
docker_run_string.append(service['healthcheck']['timeout'])
if 'retries' in service['healthcheck']:
docker_run_string.append('--health-retries')
docker_run_string.append(service['healthcheck']['retries'])
if 'start_period' in service['healthcheck']:
docker_run_string.append('--health-start-period')
docker_run_string.append(service['healthcheck']['start_period'])
if 'start_interval' in service['healthcheck']:
raise RuntimeError('start_interval is not supported atm in healthcheck')

docker_run_string.append(self.clean_image_name(service['image']))

# Before starting the container, check if the dependent containers are "ready".
Expand All @@ -842,29 +882,55 @@ def setup_services(self):
# In the future we want to implement an health check to know if dependent containers are actually ready.
if 'depends_on' in service:
for dependent_container in service['depends_on']:
print(f"Waiting for dependent container {dependent_container}")
time_waited = 0
state = ""
state = ''
health = 'healthy' # default because some containers have no health
max_waiting_time = config['measurement']['boot']['wait_time_dependencies']
while time_waited < max_waiting_time:
# TODO: Check health status instead if `healthcheck` is enabled (https://github.com/green-coding-berlin/green-metrics-tool/issues/423)
# This waiting loop is actually a pre-work for the upcoming health check. For the check if the container is "running", as implemented here, the waiting loop is not needed.
status_output = subprocess.check_output(
["docker", "container", "inspect", "-f", "{{.State.Status}}", dependent_container],
stderr=subprocess.STDOUT,
encoding='utf-8'
encoding='UTF-8',
)

state = status_output.strip()

if state == "running":
print(f"State of container '{dependent_container}': {state}")

if isinstance(service['depends_on'], dict) \
and 'condition' in service['depends_on'][dependent_container]:

condition = service['depends_on'][dependent_container]['condition']
if condition == 'service_healthy':
ps = subprocess.run(
["docker", "container", "inspect", "-f", "{{.State.Health.Status}}", dependent_container],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # put both in one stream
encoding='UTF-8'
)
health = ps.stdout.strip()
if ps.returncode != 0 or health == '<nil>':
raise RuntimeError(f"Health check for dependent_container '{dependent_container}' was requested, but container has no healthcheck implemented! (Output was: {health})")
if health == 'unhealthy':
raise RuntimeError('ontainer healthcheck failed terminally with status "unhealthy")')
print(f"Health of container '{dependent_container}': {health}")
elif condition == 'service_started':
pass
else:
raise RuntimeError(f"Unsupported condition in healthcheck for service '{service_name}': {condition}")

if state == 'running' and health == 'healthy':
break

print(f"State of container '{dependent_container}': {state}. Waiting for 1 second")
self.custom_sleep(1)
print('Waiting for 1 second')
time.sleep(1)
time_waited += 1

if state != "running":
raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not running after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")
if state != 'running':
raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not running but {state} after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")
if health != 'healthy':
raise RuntimeError(f"Dependent container '{dependent_container}' of '{container_name}' is not healthy but '{health}' after waiting for {time_waited} sec! Consider checking your service configuration, the entrypoint of the container or the logs of the container.")

if 'command' in service: # must come last
for cmd in service['command'].split():
Expand Down Expand Up @@ -946,6 +1012,9 @@ def add_to_log(self, container_name, message, cmd=''):


def start_metric_providers(self, allow_container=True, allow_other=True):
if self._dev_no_metrics:
return

print(TerminalColors.HEADER, '\nStarting metric providers', TerminalColors.ENDC)

for metric_provider in self.__metric_providers:
Expand Down Expand Up @@ -1099,6 +1168,9 @@ def run_flows(self):

# this function should never be called twice to avoid double logging of metrics
def stop_metric_providers(self):
if self._dev_no_metrics:
return

print(TerminalColors.HEADER, 'Stopping metric providers and parsing measurements', TerminalColors.ENDC)
errors = []
for metric_provider in self.__metric_providers:
Expand Down Expand Up @@ -1454,8 +1526,9 @@ def run(self):
parser.add_argument('--verbose-provider-boot', action='store_true', help='Boot metric providers gradually')
parser.add_argument('--full-docker-prune', action='store_true', help='Stop and remove all containers, build caches, volumes and images on the system')
parser.add_argument('--docker-prune', action='store_true', help='Prune all unassociated build caches, networks volumes and stopped containers on the system')
parser.add_argument('--dry-run', action='store_true', help='Removes all sleeps. Resulting measurement data will be skewed.')
parser.add_argument('--dev-repeat-run', action='store_true', help='Checks if a docker image is already in the local cache and will then not build it. Also doesn\'t clear the images after a run')
parser.add_argument('--dev-no-metrics', action='store_true', help='Skips loading the metric providers. Runs will be faster, but you will have no metric')
parser.add_argument('--dev-no-sleeps', action='store_true', help='Removes all sleeps. Resulting measurement data will be skewed.')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this could also take in a sleep value so you can set it to 1 second and such mitigate some of the errors. It defaults to 0

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats a nice idea, but the sleeps quickly balloon up even if you only set a 1 to this.

Having no-sleeps and using the healthcheck is actually a breeze and I am fixing the Nextcloud containers atm and it works very well.

Let's reconsider this once we run into issues with the 0-sleep implementation

parser.add_argument('--dev-no-build', action='store_true', help='Checks if a container images are already in the local cache and will then not build it. Also doesn\'t clear the images after a run. Please note that skipping builds only works the second time you make a run.')
parser.add_argument('--print-logs', action='store_true', help='Prints the container and process logs to stdout')

args = parser.parse_args()
Expand All @@ -1470,9 +1543,9 @@ def run(self):
error_helpers.log_error('--allow-unsafe and skip--unsafe in conjuction is not possible')
sys.exit(1)

if args.dev_repeat_run and (args.docker_prune or args.full_docker_prune):
if args.dev_no_build and (args.docker_prune or args.full_docker_prune):
parser.print_help()
error_helpers.log_error('--dev-repeat-run blocks pruning docker images. Combination is not allowed')
error_helpers.log_error('--dev-no-build blocks pruning docker images. Combination is not allowed')
sys.exit(1)

if args.full_docker_prune and GlobalConfig().config['postgresql']['host'] == 'green-coding-postgres-container':
Expand Down Expand Up @@ -1515,8 +1588,8 @@ def run(self):
branch=args.branch, debug_mode=args.debug, allow_unsafe=args.allow_unsafe,
no_file_cleanup=args.no_file_cleanup, skip_system_checks=args.skip_system_checks,
skip_unsafe=args.skip_unsafe,verbose_provider_boot=args.verbose_provider_boot,
full_docker_prune=args.full_docker_prune, dry_run=args.dry_run,
dev_repeat_run=args.dev_repeat_run, docker_prune=args.docker_prune)
full_docker_prune=args.full_docker_prune, dev_no_sleeps=args.dev_no_sleeps,
dev_no_build=args.dev_no_build, dev_no_metrics=args.dev_no_metrics, docker_prune=args.docker_prune)

# Using a very broad exception makes sense in this case as we have excepted all the specific ones before
#pylint: disable=broad-except
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
name: Test depends_on
author: Arne Tarara <arne@green-coding.berlin>
description: test

services:
test-container-1:
image: alpine
depends_on:
test-container-2:
condition: service_completed_successfully
test-container-2:
image: alpine

flow:
- name: dummy
container: test-container-1
commands:
- type: console
command: pwd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
name: Test depends_on
name: Test depends_on long_form
author: David Kopp
description: test

Expand Down
23 changes: 23 additions & 0 deletions tests/data/usage_scenarios/healthcheck.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
name: Test depends_on
author: David Kopp
description: test

services:
test-container-1:
image: alpine
depends_on:
test-container-2:
condition: service_healthy
test-container-2:
image: alpine
healthcheck:
test: ls
interval: 1s

flow:
- name: dummy
container: test-container-1
commands:
- type: console
command: pwd
20 changes: 20 additions & 0 deletions tests/data/usage_scenarios/healthcheck_error_missing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
name: Test depends_on
author: David Kopp
description: test

services:
test-container-1:
image: alpine
depends_on:
test-container-2:
condition: service_healthy
test-container-2:
image: alpine

flow:
- name: dummy
container: test-container-1
commands:
- type: console
command: pwd
5 changes: 2 additions & 3 deletions tests/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,11 @@ def setup_module(module):
err = io.StringIO()
GlobalConfig(config_name='test-config.yml').config
with redirect_stdout(out), redirect_stderr(err):
uri = os.path.abspath(os.path.join(
CURRENT_DIR, 'stress-application/'))
uri = os.path.abspath(os.path.join(CURRENT_DIR, 'stress-application/'))
subprocess.run(['docker', 'compose', '-f', uri+'/compose.yml', 'build'], check=True)

# Run the application
runner = Runner(name=RUN_NAME, uri=uri, uri_type='folder', dev_repeat_run=True, skip_system_checks=False)
runner = Runner(name=RUN_NAME, uri=uri, uri_type='folder', dev_no_build=True, dev_no_sleeps=True, dev_no_metrics=False, skip_system_checks=False)
runner.run()

#pylint: disable=global-statement
Expand Down
Loading