Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ select = ["D", "E", "F", "I"]
ignore = ["D212"]
line-length = 100
target-version = "py38"
fix = true
23 changes: 13 additions & 10 deletions skll/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import Any, Dict, List, Optional, Set, Tuple, Union

import numpy as np
import ruamel.yaml as yaml
from ruamel.yaml import YAML

from skll.data.readers import safe_float
from skll.types import ClassMap, FoldMapping, LabelType, PathOrStr
Expand Down Expand Up @@ -610,7 +610,9 @@ def parse_config_file(
raise ValueError(
"Configuration file does not contain list of learners " "in [Input] section."
)
learners = yaml.safe_load(fix_json(learners_string))

yaml = YAML(typ="safe", pure=True)
learners = yaml.load(fix_json(learners_string))

if len(learners) == 0:
raise ValueError(
Expand All @@ -630,7 +632,7 @@ def parse_config_file(
custom_metric_path = locate_file(config.get("Input", "custom_metric_path"), config_dir)

# get the featuresets
featuresets = yaml.safe_load(config.get("Input", "featuresets"))
featuresets = yaml.load(config.get("Input", "featuresets"))

# ensure that featuresets is either a list of features or a list of lists
# of features
Expand All @@ -641,7 +643,7 @@ def parse_config_file(
f"specified: {featuresets}"
)

featureset_names = yaml.safe_load(fix_json(config.get("Input", "featureset_names")))
featureset_names = yaml.load(fix_json(config.get("Input", "featureset_names")))

# ensure that featureset_names is a list of strings, if specified
if featureset_names:
Expand All @@ -658,7 +660,7 @@ def parse_config_file(
# learners. If it's not specified, then we just assume
# that we are using 10 folds for each learner.
learning_curve_cv_folds_list_string = config.get("Input", "learning_curve_cv_folds_list")
learning_curve_cv_folds_list = yaml.safe_load(fix_json(learning_curve_cv_folds_list_string))
learning_curve_cv_folds_list = yaml.load(fix_json(learning_curve_cv_folds_list_string))
if len(learning_curve_cv_folds_list) == 0:
learning_curve_cv_folds_list = [10] * len(learners)
else:
Expand All @@ -679,7 +681,7 @@ def parse_config_file(
# floats (proportions). If it's not specified, then we just
# assume that we are using np.linspace(0.1, 1.0, 5).
learning_curve_train_sizes_string = config.get("Input", "learning_curve_train_sizes")
learning_curve_train_sizes = yaml.safe_load(fix_json(learning_curve_train_sizes_string))
learning_curve_train_sizes = yaml.load(fix_json(learning_curve_train_sizes_string))
if len(learning_curve_train_sizes) == 0:
learning_curve_train_sizes = np.linspace(0.1, 1.0, 5).tolist()
else:
Expand All @@ -698,9 +700,9 @@ def parse_config_file(
# do we need to shuffle the training data
do_shuffle = config.getboolean("Input", "shuffle")

fixed_parameter_list = yaml.safe_load(fix_json(config.get("Input", "fixed_parameters")))
fixed_sampler_parameters = yaml.safe_load(fix_json(config.get("Input", "sampler_parameters")))
param_grid_list = yaml.safe_load(fix_json(config.get("Tuning", "param_grids")))
fixed_parameter_list = yaml.load(fix_json(config.get("Input", "fixed_parameters")))
fixed_sampler_parameters = yaml.load(fix_json(config.get("Input", "sampler_parameters")))
param_grid_list = yaml.load(fix_json(config.get("Tuning", "param_grids")))

# read and normalize the value of `pos_label`
pos_label_string = safe_float(config.get("Tuning", "pos_label"))
Expand Down Expand Up @@ -804,7 +806,8 @@ def parse_config_file(

# Get class mapping dictionary if specified
class_map_string = config.get("Input", "class_map")
original_class_map = yaml.safe_load(fix_json(class_map_string))
yaml = YAML(typ="safe", pure=True)
original_class_map = yaml.load(fix_json(class_map_string))
if original_class_map:
# Change class_map to map from originals to replacements instead of
# from replacement to list of originals
Expand Down
5 changes: 3 additions & 2 deletions skll/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pathlib import Path
from typing import Iterable, List, Union

import ruamel.yaml as yaml
from ruamel.yaml import YAML

from skll.types import FoldMapping, PathOrStr

Expand Down Expand Up @@ -186,7 +186,8 @@ def _parse_and_validate_metrics(metrics: str, option_name: str, logger=None) ->

# make sure the given metrics data type is a list
# and parse it correctly
metrics = yaml.safe_load(fix_json(metrics))
yaml = YAML(typ="safe", pure=True)
metrics = yaml.load(fix_json(metrics))
if not isinstance(metrics, list):
raise TypeError(f"{option_name} should be a list, not a " f"{type(metrics)}.")

Expand Down
10 changes: 5 additions & 5 deletions skll/experiments/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ruamel.yaml as yaml
import seaborn as sns
from ruamel.yaml import YAML

from skll.types import FoldMapping, PathOrStr
from skll.utils.logging import get_skll_logger
Expand Down Expand Up @@ -638,6 +638,8 @@ def _write_summary_file(result_json_paths: List[str], output_file: IO[str], abla
# Map from feature set names to all features in them
all_features = defaultdict(set)
logger = get_skll_logger("experiment")
yaml = YAML(typ="safe", pure=True)

for json_path_str in result_json_paths:
json_path = Path(json_path_str)
if not json_path.exists():
Expand All @@ -654,7 +656,7 @@ def _write_summary_file(result_json_paths: List[str], output_file: IO[str], abla
featureset_name = obj[0]["featureset_name"]
if ablation != 0 and "_minus_" in featureset_name:
parent_set = featureset_name.split("_minus_", 1)[0]
all_features[parent_set].update(yaml.safe_load(obj[0]["featureset"]))
all_features[parent_set].update(yaml.load(obj[0]["featureset"]))
learner_result_dicts.extend(obj)

# Build and write header
Expand All @@ -670,9 +672,7 @@ def _write_summary_file(result_json_paths: List[str], output_file: IO[str], abla
featureset_name = lrd["featureset_name"]
if ablation != 0:
parent_set = featureset_name.split("_minus_", 1)[0]
ablated_features = all_features[parent_set].difference(
yaml.safe_load(lrd["featureset"])
)
ablated_features = all_features[parent_set].difference(yaml.load(lrd["featureset"]))
lrd["ablated_features"] = ""
if ablated_features:
lrd["ablated_features"] = json.dumps(sorted(ablated_features))
Expand Down