29 lines
1.1 KiB
Python
29 lines
1.1 KiB
Python
import os
|
|
from speechlib import Transcriptor
|
|
|
|
file = "obama_zach.wav" # your audio file
|
|
voices_folder = "" # voices folder containing voice samples for recognition
|
|
language = "en" # language code
|
|
log_folder = "logs" # log folder for storing transcripts
|
|
modelSize = "tiny" # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
|
|
quantization = False # setting this 'True' may speed up the process but lower the accuracy
|
|
ACCESS_TOKEN = "huggingface access token" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
|
|
|
|
# quantization only works on faster-whisper
|
|
transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)
|
|
|
|
# use normal whisper
|
|
res = transcriptor.whisper()
|
|
|
|
# use faster-whisper (simply faster)
|
|
#res = transcriptor.faster_whisper()
|
|
|
|
# use a custom trained whisper model
|
|
#res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt")
|
|
|
|
# use a huggingface whisper model
|
|
#res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
|
|
|
|
# use assembly ai model
|
|
#res = transcriptor.assemby_ai_model("assemblyAI api key")
|