ru - DeepPavlov/rubert-base-cased-conversational es - dccuchile/bert-base-spanish-wwm-cased it - dbmdz/bert-base-italian-xxl-uncased tr - dbmdz/bert-base-turkish-cased pt - neuralmind/bert-large-portuguese-cased (didn't help, not included in our blend) fr - camembert/camembert-large https://github.com/makcedward/nlpaug https://github.com/ufoym/imbalanced-dataset-sampler from scipy.stats import rankdata predict_list = [] predict_list.append(pd.read_csv("../input/textcnn-2d-convolution-on-preprocessed-data/submission.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/lr-with-words-and-char-n-grams-preprocessed-data/submission.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/pooled-gru-fasttext-on-preprocessed-data/submission.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/cnn-3-out-of-fold-4-epochs-preprocessed-data/submit_cnn_avg_3_folds.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/global-average-pool-on-preprocessed/submission.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/lemmatization-pooled-gru-on-preprocessed-dataset/submission.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/bilstm-on-preprocessed-data/submission.csv")[LABELS].values) predict_list.append(pd.read_csv("../input/wordbatch-fm-ftrl-on-preprocessed-data/lvl0_wordbatch_clean_sub.csv")[LABELS].values) print("Rank averaging on ", len(predict_list), " files") predictions = np.zeros_like(predict_list[0]) for predict in predict_list: for i in range(6): predictions[:, i] = np.add(predictions[:, i], rankdata(predict[:, i])/predictions.shape[0]) predictions /= len(predict_list)