#!/usr/bin/env python # # Copyright (c) 2013 Tanel Alumae # # Slightly inspired by the CMU Sphinx's Pocketsphinx Gstreamer plugin demo (which has BSD license) # # Apache 2.0 import sys import os import gi gi.require_version('Gst', '1.0') from gi.repository import GObject, Gst, Gtk, Gdk GObject.threads_init() Gdk.threads_init() Gst.init(None) class DemoApp(object): """GStreamer/Kaldi Demo Application""" def __init__(self): """Initialize a DemoApp object""" self.init_gui() self.init_gst() def init_gui(self): """Initialize the GUI components""" self.window = Gtk.Window() self.window.connect("destroy", self.quit) self.window.set_default_size(400,200) self.window.set_border_width(10) vbox = Gtk.VBox() self.text = Gtk.TextView() self.textbuf = self.text.get_buffer() self.text.set_wrap_mode(Gtk.WrapMode.WORD) vbox.pack_start(self.text, True, True, 1) self.button = Gtk.Button("Speak") self.button.connect('clicked', self.button_clicked) vbox.pack_start(self.button, False, False, 5) self.window.add(vbox) self.window.show_all() def quit(self, window): Gtk.main_quit() def init_gst(self): """Initialize the speech components""" self.pulsesrc = Gst.ElementFactory.make("pulsesrc", "pulsesrc") if self.pulsesrc == None: print >> sys.stderr, "Error loading pulsesrc GST plugin. You probably need the gstreamer1.0-pulseaudio package" sys.exit() self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert") self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample") self.asr = Gst.ElementFactory.make("onlinegmmdecodefaster", "asr") self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") if self.asr: model_dir = "online-data/models/tri2b_mmi/" if not os.path.isdir(model_dir): print >> sys.stderr, "Model (%s) not downloaded. Run run-simulated.sh first" % model_dir sys.exit(1) self.asr.set_property("fst", model_dir + "HCLG.fst") self.asr.set_property("lda-mat", model_dir + "matrix") self.asr.set_property("model", model_dir + "model") self.asr.set_property("word-syms", model_dir + "words.txt") self.asr.set_property("silence-phones", "1:2:3:4:5") self.asr.set_property("max-active", 4000) self.asr.set_property("beam", 12.0) self.asr.set_property("acoustic-scale", 0.0769) else: print >> sys.stderr, "Couldn't create the onlinegmmfasterdecoder element. " if os.environ.has_key("GST_PLUGIN_PATH"): print >> sys.stderr, "Have you compiled the Kaldi GStreamer plugin?" else: print >> sys.stderr, "You probably need to set the GST_PLUGIN_PATH envoronment variable" print >> sys.stderr, "Try running: GST_PLUGIN_PATH=../../../src/gst-plugin %s" % sys.argv[0] sys.exit(); # initially silence the decoder self.asr.set_property("silent", True) self.pipeline = Gst.Pipeline() for element in [self.pulsesrc, self.audioconvert, self.audioresample, self.asr, self.fakesink]: self.pipeline.add(element) self.pulsesrc.link(self.audioconvert) self.audioconvert.link(self.audioresample) self.audioresample.link(self.asr) self.asr.link(self.fakesink) self.asr.connect('hyp-word', self._on_word) self.pipeline.set_state(Gst.State.PLAYING) def _on_word(self, asr, word): Gdk.threads_enter() if word == "<#s>": self.textbuf.insert_at_cursor("\n") else: self.textbuf.insert_at_cursor(word) self.textbuf.insert_at_cursor(" ") Gdk.threads_leave() def button_clicked(self, button): """Handle button presses.""" if button.get_label() == "Speak": button.set_label("Stop") self.asr.set_property("silent", False) else: button.set_label("Speak") self.asr.set_property("silent", True) if __name__ == '__main__': app = DemoApp() print ''' The (bigram) language model used to build the decoding graph was estimated on an audio book's text. The text in question is King Solomon's Mines" (http://www.gutenberg.org/ebooks/2166). You may want to read some sentences from this book first ...''' Gtk.main()