-
Notifications
You must be signed in to change notification settings - Fork 0
/
mp4-to-text.py
95 lines (76 loc) · 3.11 KB
/
mp4-to-text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import whisper
from moviepy.editor import VideoFileClip
from tqdm import tqdm
def convert_mp4_to_text(video_path, output_dir, model):
"""
Convert audio from an MP4 file to text using Whisper and save it.
Args:
video_path (str): Path to the MP4 file
output_dir (str): Directory to save the text file
model: Loaded Whisper model instance
"""
try:
# Extract filename without extension
base_name = os.path.splitext(os.path.basename(video_path))[0]
# Create temporary audio file path
temp_audio = os.path.join(output_dir, f"{base_name}_temp.wav")
# Extract audio from video
print(f"Extracting audio from {video_path}...")
video = VideoFileClip(video_path)
audio = video.audio
audio.write_audiofile(temp_audio, verbose=False, logger=None)
video.close()
# Convert audio to text using Whisper
print(f"Converting audio to text...")
result = model.transcribe(temp_audio)
text = result["text"]
# Save text to file
text_file_path = os.path.join(output_dir, f"{base_name}.txt")
with open(text_file_path, 'w', encoding='utf-8') as f:
f.write(text)
# Clean up temporary audio file
os.remove(temp_audio)
print(f"Successfully created {text_file_path}")
except Exception as e:
print(f"Error processing {video_path}: {str(e)}")
def process_directory(input_dir, output_dir, model_size="base"):
"""
Recursively process all MP4 files in a directory and its subdirectories.
Args:
input_dir (str): Input directory to search for MP4 files
output_dir (str): Output directory for text files
model_size (str): Whisper model size ('tiny', 'base', 'small', 'medium', 'large')
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Load Whisper model
print(f"Loading Whisper {model_size} model...")
model = whisper.load_model(model_size)
# Find all MP4 files
mp4_files = []
for root, _, files in os.walk(input_dir):
for file in files:
if file.lower().endswith('.mp4'):
mp4_files.append(os.path.join(root, file))
print(f"Found {len(mp4_files)} MP4 files")
# Process each MP4 file with progress bar
for mp4_file in tqdm(mp4_files, desc="Processing videos"):
convert_mp4_to_text(mp4_file, output_dir, model)
def main():
"""
Main function with configuration options
"""
# Set your input and output directories here
INPUT_DIR = "path/to/your/videos"
OUTPUT_DIR = "path/to/output/texts"
# Choose model size:
# - 'tiny': Fastest, least accurate
# - 'base': Good balance of speed and accuracy
# - 'small': More accurate than base, but slower
# - 'medium': Even more accurate, but slower
# - 'large': Most accurate, but slowest and requires more RAM
MODEL_SIZE = "base"
process_directory(INPUT_DIR, OUTPUT_DIR, MODEL_SIZE)
if __name__ == "__main__":
main()