Posted on Nov 12
faster whisper从多媒体语音材料中抽取出文本

""" 批量转录当前目录下的 .mp3 文件，使用 faster-whisper """ import os import sys from pathlib import Path from faster_whisper import WhisperModel # ================== 配置区 ================== MODEL_SIZE = "small" # 可选: tiny, base, small, medium, large DEVICE = "cpu" # cpu 或 cuda COMPUTE_TYPE = "int8" # int8, float16, float32 (CPU 推荐 int8) VAD_FILTER = True # 启用语音活动检测，去除静音 OUTPUT_FORMAT = "txt" # 只输出 .txt VERBOSE = True # 是否显示详细日志 # ===========================================  def transcribe_audio(audio_path: Path, model: WhisperModel) -> str: """转录单个音频文件，返回文本内容""" print(f"转录: {audio_path.name} → {audio_path.stem}.txt") segments, info = model.transcribe( str(audio_path), language=None, # 自动检测  beam_size=5, vad_filter=VAD_FILTER, vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False, ) text_lines = [] for segment in segments: line = segment.text.strip() text_lines.append(line) if VERBOSE: print(".", end="", flush=True) # print(f"[{segment.start:06.2f}s --> {segment.end:06.2f}s] {line}", flush=True)  return "\n".join(text_lines) def main(): print("=== faster-whisper 批量转录 ===") current_dir = Path(".") mp3_files = sorted(current_dir.glob("*.mp3")) if not mp3_files: print("未找到 .mp3 文件，退出。") return # 加载模型（只加载一次）  print(f"正在加载模型 {MODEL_SIZE} ({DEVICE}, {COMPUTE_TYPE})...") model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE) processed = 0 for mp3_path in mp3_files: txt_path = mp3_path.with_suffix(".txt") if txt_path.exists(): print(f"跳过: {txt_path.name} 已存在") continue try: text = transcribe_audio(mp3_path, model) txt_path.write_text(text, encoding="utf-8") processed += 1 except Exception as e: print(f"错误转录 {mp3_path.name}: {e}", file=sys.stderr) print(f"全部完成！共处理 {processed} 个文件。") if __name__ == "__main__": main()
DEV Community

faster whisper从多媒体语音材料中抽取出文本

Top comments (0)