11#!/usr/bin/env python3
22
33import argparse
4+ import functools
45import pathlib
56import re
67import statistics
@@ -62,35 +63,35 @@ def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None)
6263 """
6364 Create a tabulated comparison of the baseline and the candidate for the given metric.
6465 """
66+ # Compute additional info in new columns. In text mode, we can assume that we are
67+ # comparing exactly two data sets (suffixed _0 and _1).
68+ data ['difference' ] = data [f'{ metric } _1' ] - data [f'{ metric } _0' ]
69+ data ['percent' ] = 100 * (data ['difference' ] / data [f'{ metric } _0' ])
70+
6571 data = data .replace (numpy .nan , None ).sort_values (by = 'benchmark' ) # avoid NaNs in tabulate output
6672 headers = ['Benchmark' , baseline_name , candidate_name , 'Difference' , '% Difference' ]
6773 fmt = (None , '.2f' , '.2f' , '.2f' , '.2f' )
68- table = data [['benchmark' , f'{ metric } _baseline ' , f'{ metric } _candidate ' , 'difference' , 'percent' ]].set_index ('benchmark' )
74+ table = data [['benchmark' , f'{ metric } _0 ' , f'{ metric } _1 ' , 'difference' , 'percent' ]].set_index ('benchmark' )
6975 return tabulate .tabulate (table , headers = headers , floatfmt = fmt , numalign = 'right' )
7076
71- def create_chart (data , metric , subtitle = None , baseline_name = None , candidate_name = None ):
77+ def create_chart (data , metric , subtitle = None , series_names = None ):
7278 """
73- Create a bar chart comparing the given metric between the baseline and the candidate .
79+ Create a bar chart comparing the given metric across the provided series .
7480 """
75- data = data .sort_values (by = 'benchmark' ).rename (columns = {
76- f'{ metric } _baseline' : baseline_name ,
77- f'{ metric } _candidate' : candidate_name
78- })
79- figure = plotly .express .bar (data , title = f'{ baseline_name } vs { candidate_name } ' ,
80- subtitle = subtitle ,
81- x = 'benchmark' , y = [baseline_name , candidate_name ], barmode = 'group' )
81+ data = data .sort_values (by = 'benchmark' ).rename (columns = {f'{ metric } _{ i } ' : series_names [i ] for i in range (len (series_names ))})
82+ title = ' vs ' .join (series_names )
83+ figure = plotly .express .bar (data , title = title , subtitle = subtitle , x = 'benchmark' , y = series_names , barmode = 'group' )
8284 figure .update_layout (xaxis_title = '' , yaxis_title = '' , legend_title = '' )
8385 return figure
8486
8587def main (argv ):
8688 parser = argparse .ArgumentParser (
8789 prog = 'compare-benchmarks' ,
88- description = 'Compare the results of two sets of benchmarks in LNT format.' ,
90+ description = 'Compare the results of multiple sets of benchmarks in LNT format.' ,
8991 epilog = 'This script depends on the modules listed in `libcxx/utils/requirements.txt`.' )
90- parser .add_argument ('baseline' , type = argparse .FileType ('r' ),
91- help = 'Path to a LNT format file containing the benchmark results for the baseline.' )
92- parser .add_argument ('candidate' , type = argparse .FileType ('r' ),
93- help = 'Path to a LNT format file containing the benchmark results for the candidate.' )
92+ parser .add_argument ('files' , type = argparse .FileType ('r' ), nargs = '+' ,
93+ help = 'Path to LNT format files containing the benchmark results to compare. In the text format, '
94+ 'exactly two files must be compared.' )
9495 parser .add_argument ('--output' , '-o' , type = pathlib .Path , required = False ,
9596 help = 'Path of a file where to output the resulting comparison. If the output format is `text`, '
9697 'default to stdout. If the output format is `chart`, default to a temporary file which is '
@@ -107,43 +108,52 @@ def main(argv):
107108 parser .add_argument ('--open' , action = 'store_true' ,
108109 help = 'Whether to automatically open the generated HTML file when finished. This option only makes sense '
109110 'when the output format is `chart`.' )
110- parser .add_argument ('--baseline-name' , type = str , default = 'Baseline' ,
111- help = 'Optional name to use for the "baseline" label.' )
112- parser .add_argument ('--candidate-name' , type = str , default = 'Candidate' ,
113- help = 'Optional name to use for the "candidate" label.' )
111+ parser .add_argument ('--series-names' , type = str , required = False ,
112+ help = 'Optional comma-delimited list of names to use for the various series. By default, we use '
113+ 'Baseline and Candidate for two input files, and CandidateN for subsequent inputs.' )
114114 parser .add_argument ('--subtitle' , type = str , required = False ,
115115 help = 'Optional subtitle to use for the chart. This can be used to help identify the contents of the chart. '
116116 'This option cannot be used with the plain text output.' )
117117 args = parser .parse_args (argv )
118118
119- if args .format == 'text' and args .subtitle is not None :
120- parser .error ('Passing --subtitle makes no sense with --format=text' )
121-
122- if args .format == 'text' and args .open :
123- parser .error ('Passing --open makes no sense with --format=text' )
119+ if args .format == 'text' :
120+ if len (args .files ) != 2 :
121+ parser .error ('--format=text requires exactly two input files to compare' )
122+ if args .subtitle is not None :
123+ parser .error ('Passing --subtitle makes no sense with --format=text' )
124+ if args .open :
125+ parser .error ('Passing --open makes no sense with --format=text' )
126+
127+ if args .series_names is None :
128+ args .series_names = ['Baseline' ]
129+ if len (args .files ) == 2 :
130+ args .series_names += ['Candidate' ]
131+ elif len (args .files ) > 2 :
132+ args .series_names .extend (f'Candidate{ n } ' for n in range (1 , len (args .files )))
133+ else :
134+ args .series_names = args .series_names .split (',' )
135+ if len (args .series_names ) != len (args .files ):
136+ parser .error (f'Passed incorrect number of series names: got { len (args .series_names )} series names but { len (args .files )} inputs to compare' )
124137
125- baseline = pandas .DataFrame (parse_lnt (args .baseline .readlines ()))
126- candidate = pandas .DataFrame (parse_lnt (args .candidate .readlines ()))
138+ # Parse the raw LNT data and store each input in a dataframe
139+ lnt_inputs = [parse_lnt (file .readlines ()) for file in args .files ]
140+ inputs = [pandas .DataFrame (lnt ).rename (columns = {args .metric : f'{ args .metric } _{ i } ' }) for (i , lnt ) in enumerate (lnt_inputs )]
127141
128- # Join the baseline and the candidate into a single dataframe and add some new columns
129- data = baseline .merge (candidate , how = 'outer' , on = 'benchmark' , suffixes = ('_baseline' , '_candidate' ))
130- data ['difference' ] = data [f'{ args .metric } _candidate' ] - data [f'{ args .metric } _baseline' ]
131- data ['percent' ] = 100 * (data ['difference' ] / data [f'{ args .metric } _baseline' ])
142+ # Join the inputs into a single dataframe
143+ data = functools .reduce (lambda a , b : a .merge (b , how = 'outer' , on = 'benchmark' ), inputs )
132144
133145 if args .filter is not None :
134146 keeplist = [b for b in data ['benchmark' ] if re .search (args .filter , b ) is not None ]
135147 data = data [data ['benchmark' ].isin (keeplist )]
136148
137149 if args .format == 'chart' :
138- figure = create_chart (data , args .metric , subtitle = args .subtitle ,
139- baseline_name = args .baseline_name ,
140- candidate_name = args .candidate_name )
150+ figure = create_chart (data , args .metric , subtitle = args .subtitle , series_names = args .series_names )
141151 do_open = args .output is None or args .open
142152 output = args .output or tempfile .NamedTemporaryFile (suffix = '.html' ).name
143153 plotly .io .write_html (figure , file = output , auto_open = do_open )
144154 else :
145- diff = plain_text_comparison (data , args .metric , baseline_name = args .baseline_name ,
146- candidate_name = args .candidate_name )
155+ diff = plain_text_comparison (data , args .metric , baseline_name = args .series_names [ 0 ] ,
156+ candidate_name = args .series_names [ 1 ] )
147157 diff += '\n '
148158 if args .output is not None :
149159 with open (args .output , 'w' ) as out :
0 commit comments