speechlib/main.py

43 lines
1.6 KiB
Python

from speechlib import Transcriptor
from configparser import ConfigParser
import os
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
config = ConfigParser()
config.read('config.ini')
audiofolder = config.get('FILE', 'audiofolder')
access_token = config.get('FILE', 'accesstoken')
voicefolder = config.get('FILE', 'voicefolder')
language = "id"
quantization = str2bool(config.get('FILE', 'quantization'))
modelSize = "medium"
### load the audio file in audio folder ###
current_dir = os.getcwd()
audio_dir = current_dir + audiofolder
output_dir = os.path.join(current_dir, "output")
if not os.path.exists(audio_dir):
os.makedirs(audio_dir)
print(f"Current directory: {current_dir}")
print(f"Audio directory: {audio_dir}")
print(f"Output directory: {output_dir}")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
### Loop for each audio file in the audio folder ###
for filename in os.listdir(audio_dir):
if filename.lower().endswith(".mp3") or filename.lower().endswith(".wav"):
audiofile = os.path.join(audio_dir, filename)
print(f"Audio file: {audiofile}")
audiofilewithoutextension = audiofile.split(".mp3")[0].split(".wav")[0]
filenamewithoutextension = filename.split(".mp3")[0].split(".wav")[0]
filepath = os.path.join(output_dir, os.path.basename(audiofilewithoutextension).split('/')[-1]+".txt")
print(f"Output file: {filepath}")
filename = open(filepath, "w")
### transcribe ###
transcriptor = Transcriptor(audiofile, output_dir, language, modelSize, access_token, voicefolder, quantization)
res = transcriptor.faster_whisper()