44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
import os
|
|
from pydub import AudioSegment
|
|
from .transcribe import (transcribe)
|
|
|
|
# segment according to speaker
|
|
def wav_file_segmentation(file_name, segments, language, modelSize, model_type, quantization, custom_model_path, hf_model_path, aai_api_key):
|
|
# Load the WAV file
|
|
audio = AudioSegment.from_file(file_name, format="wav")
|
|
trans = ""
|
|
|
|
texts = []
|
|
|
|
folder_name = "segments"
|
|
|
|
if not os.path.exists(folder_name):
|
|
os.makedirs(folder_name)
|
|
|
|
i = 0
|
|
|
|
for segment in segments:
|
|
|
|
start = segment[0] * 1000 # start time in miliseconds
|
|
end = segment[1] * 1000 # end time in miliseconds
|
|
clip = audio[start:end]
|
|
i = i + 1
|
|
file = folder_name + "/" + f"segment-{file_name}"+ str(i) + ".wav"
|
|
clip.export(file, format="wav")
|
|
|
|
try:
|
|
trans = transcribe(file, language, modelSize, model_type, quantization, custom_model_path, hf_model_path, aai_api_key)
|
|
|
|
# return -> [[start time, end time, transcript], [start time, end time, transcript], ..]
|
|
texts.append([segment[0], segment[1], trans])
|
|
except Exception as err:
|
|
print("ERROR while transcribing: ", err)
|
|
# Delete the WAV file after processing
|
|
try:
|
|
os.remove(file)
|
|
except OSError as e:
|
|
print (f'Access-error on file {str(e)}')
|
|
|
|
|
|
return texts
|