- Notifications
You must be signed in to change notification settings - Fork 290
Open
Description
When using the process.sh script, I can process my test and validation datasets normally, but I am unable to process my training dataset without any error output. I then added a batch field in extract.py and changed the directory scanning to batch scanning instead of scanning all at once. After these modifications, I was able to get the expected output for the training data. The modified py file is as follows,we add the parameter "batch_size" and update ExtractFeaturesForDirsList mehtod:
#!/usr/bin/python import itertools import multiprocessing import os import sys import shutil import subprocess from threading import Timer import sys from argparse import ArgumentParser from subprocess import Popen, PIPE, STDOUT, call # ...... def ExtractFeaturesForDirsList(args, dirs): tmp_dir = f"./tmp/feature_extractor{os.getpid()}/" if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir, ignore_errors=True) os.makedirs(tmp_dir) try: for i in range(0, len(dirs), args.batch_size): # 使用range和batch_size来分批处理 batch_dirs = dirs[i:i + args.batch_size] p = multiprocessing.Pool(4) p.starmap(ParallelExtractDir, zip(itertools.repeat(args), itertools.repeat(tmp_dir), batch_dirs)) output_files = os.listdir(tmp_dir) for f in output_files: os.system("cat %s/%s" % (tmp_dir, f)) os.remove(os.path.join(tmp_dir, f)) # 删除处理过的文件,为下一批做准备 finally: shutil.rmtree(tmp_dir, ignore_errors=True) if __name__ == '__main__': parser = ArgumentParser() parser.add_argument("-maxlen", "--max_path_length", dest="max_path_length", required=False, default=8) parser.add_argument("-maxwidth", "--max_path_width", dest="max_path_width", required=False, default=2) parser.add_argument("-threads", "--num_threads", dest="num_threads", required=False, default=64) parser.add_argument("-j", "--jar", dest="jar", required=True) parser.add_argument("-dir", "--dir", dest="dir", required=False) parser.add_argument("-file", "--file", dest="file", required=False) parser.add_argument("-batch_size", "--batch_size", dest="batch_size", required=False, default=5, type=int) args = parser.parse_args() if args.file is not None: command = 'java -cp ' + args.jar + ' JavaExtractor.App --max_path_length ' + \ str(args.max_path_length) + ' --max_path_width ' + str(args.max_path_width) + ' --file ' + args.file os.system(command) elif args.dir is not None: subdirs = get_immediate_subdirectories(args.dir) to_extract = subdirs if len(subdirs) == 0: to_extract = [args.dir.rstrip('/')] ExtractFeaturesForDirsList(args, to_extract) Metadata
Metadata
Assignees
Labels
No labels