@inproceedings{e462d8576b5f40a2822e823540cf3297,
title = "Determining an optimal set of flesh points on tongue, lips, and jaw for continuous silent speech recognition",
abstract = "Articulatory data have gained increasing interest in speech recognition with or without acoustic data. Electromagnetic articulograph (EMA) is one of the affordable, currently used techniques for tracking the movement of flesh points on articulators (e.g., tongue) during speech. Determining an optimal set of sensors is important for optimizing the clinical applications of EMA data, due to the inconvenience of attaching sensors on tongue and other intraoral articulators, particularly for patients with neurological diseases. A recent study found an optimal set (tongue tip and body back, upper and lower lips) on tongue and lips for isolated phoneme, word, or short phrase classification from articulatory movement data. This four-sensor set, however, has not been verified in continuous silent speech recognition. In this paper, we investigated the use of data from sensor combinations in continuous speech recognition to verify the finding using a publicly available data set MOCHA-TIMIT. The long-standing speech recognition approach Gaussian mixture model (GMM)-hidden Markov model (HMM) and a recently available approach deep neural network (DNN)-HMM were used as the recognizers. Experimental results confirmed that the four-sensor set is optimal out of the full set of sensors on tongue, lips, and jaw. Adding upper incisor and/or velum data further improved the recognition performance slightly.",
keywords = "Articulation, Deep neural network, Dysarthria, Electromagnetic articulograph, Hidden Markov model, silent speech recognition",
author = "Jun Wang and Seongjun Hahm and Ted Mau",
note = "Funding Information: This work was supported by the National Institutes of Health (NIH) through grants R03 DC013990 and R01 DC013547. We would like to thank Dr. Jordan R. Green, Dr. Ashok Samal, and the support from the Communication Technology Center, University of Texas at Dallas. Publisher Copyright: {\textcopyright} SLPAT 2015 - 6th Workshop on Speech and Language Processing for Assistive Technologies, Proceedings.; 6th Workshop on Speech and Language Processing for Assistive Technologies, SLPAT 2015 ; Conference date: 11-09-2015",
year = "2015",
language = "English (US)",
series = "SLPAT 2015 - 6th Workshop on Speech and Language Processing for Assistive Technologies, Proceedings",
publisher = "Association for Computational Linguistics (ACL)",
pages = "79--85",
editor = "Jan Alexandersson and Ercan Altinsoy and Heidi Christensen and Peter Ljunglof and Francois Portet and Frank Rudzicz",
booktitle = "SLPAT 2015 - 6th Workshop on Speech and Language Processing for Assistive Technologies, Proceedings",
address = "United States",
}