import whisper model = whisper.load_model("base") result = model.transcribe("audio.mp3") print(result["text"])