@inproceedings{539140eb91cd4a4da34599f7ccfa4851,
title = "新穎基於預訓練語言表示模型於語音辨識重新排序之研究",
abstract = "This paper proposes two BERT-based models for accurately rescoring (reranking) N-best speech recognition hypothesis lists. Reranking the N-best hypothesis lists decoded from the acoustic model has been proven to improve the performance in a two-stage automatic speech recognition (ASR) systems. However, with the rise of pre-trained contextualized language models, they have achieved state-of-the-art performance in many NLP applications, but there is a dearth of work on investigating its effectiveness in ASR. In this paper, we develop simple yet effective methods for improving ASR by reranking the N-best hypothesis lists leveraging BERT (bidirectional encoder representations from Transformers). Specifically, we treat reranking N-best hypotheses as a downstream task by simply fine-tuning the pre-trained BERT. We proposed two BERT-based reranking language models: (1) uniBERT: ideal unigram elicited from a given N-best list taking advantage of BERT to assist a LSTMLM, (2) classBERT: treating the N-best lists reranking as a multi-class classification problem. These models attempt to harness the power of BERT to reranking the N-best hypothesis lists generated in the ASR initial pass. Experiments on the benchmark AMI dataset show that the proposed reranking methods outperform the baseline LSTMLM which is a strong and widely-used competitor with 3.14% improvement in word error rate (WER).",
keywords = "Automatic Speech Recognition, BERT, Language Models, N-best Lists Reranking",
author = "Chiu, {Shih Hsuan} and Berlin Chen",
note = "Publisher Copyright: {\textcopyright} ROCLING 2020.All rights reserved.; 32nd Conference on Computational Linguistics and Speech Processing, ROCLING 2020 ; Conference date: 24-09-2020 Through 26-09-2020",
year = "2020",
language = "繁體中文",
series = "ROCLING 2020 - 32nd Conference on Computational Linguistics and Speech Processing",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
pages = "148--162",
editor = "Jenq-Haur Wang and Ying-Hui Lai and Lung-Hao Lee and Kuan-Yu Chen and Hung-Yi Lee and Chi-Chun Lee and Syu-Siang Wang and Hen-Hsen Huang and Chuan-Ming Liu",
booktitle = "ROCLING 2020 - 32nd Conference on Computational Linguistics and Speech Processing",
}