@inproceedings{c4a528c08c9743d5b0a1e0abdfde401d,
title = "使用詞向量表示與概念資訊於中文大詞彙連續語音辨識之語言模型調適",
abstract = "Research on deep learning has experienced a surge of interest in recent years. Alongside the rapid development of deep learning related technologies, various distributed representation methods have been proposed to embed the words of a vocabulary as vectors in a lower-dimensional space. Based on the distributed representations, it is anticipated to discover the semantic relationship between any pair of words via some kind of similarity computation of the associated word vectors. With the above background, this article explores a novel use of distributed representations of words for language modeling (LM) in speech recognition. Firstly, word vectors are employed to represent the words in the search history and the upcoming words during the speech recognition process, so as to dynamically adapt the language model on top of such vector representations. Second, we extend the recently proposed concept language model (CLM) by conduct relevant training data selection in the sentence level instead of the document level. By doing so, the concept classes of CLM can be more accurately estimated while simultaneously eliminating redundant or irrelevant information. On the other hand, since the resulting concept classes need to be dynamically selected and linearly combined to form the CLM model during the speech recognition process, we determine the relatedness of each concept class to the test utterance based the word representations derived with either the continue bag-of-words model (CBOW) or the skip-gram model (Skip-gram). Finally, we also combine the above LM methods for better speech recognition performance. Extensive experiments carried out on the MATBN (Mandarin Across Taiwan Broadcast News) corpus demonstrate the utility of our proposed LM methods in relation to several well-practiced baselines.",
keywords = "Concept language model, Deep learning, Language modeling, Speech recognition, Word representation",
author = "Chen, {Ssu Cheng} and Hung, {Hsiao Tsung} and Berlin Chen and Chen, {Kuan Yu}",
note = "Publisher Copyright: {\textcopyright} Proceedings of the 27th Conference on Computational Linguistics and Speech Processing, ROCLING 2015.; 27th Conference on Computational Linguistics and Speech Processing, ROCLING 2015 ; Conference date: 01-10-2015 Through 02-10-2015",
year = "2015",
month = oct,
day = "1",
language = "繁體中文",
series = "Proceedings of the 27th Conference on Computational Linguistics and Speech Processing, ROCLING 2015",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
pages = "4--17",
editor = "Sin-Horng Chen and Hsin-Min Wang and Jen-Tzung Chien and Hung-Yu Kao and Wen-Whei Chang and Yih-Ru Wang and Shih-Hung Wu",
booktitle = "Proceedings of the 27th Conference on Computational Linguistics and Speech Processing, ROCLING 2015",
}