@inproceedings{6ec3ee6548544700be198cce20797daf,
title = "Adaptive-FSN: Integrating Full-Band Extraction and Adaptive Sub-Band Encoding for Monaural Speech Enhancement",
abstract = "An important more recent thread of speech enhancement work is to utilize fine-grinded local spectral patterns with sub-band processing that complement full-band features nicely. To extend the efficacy of sub-band spectral information, we propose Adaptive-FSN, a fully convolutional real-time speech enhancement framework, to dynamically acquire a sub-band embedding within a wide range of sub-band frequencies. We exploit an adaptive subband encoder to portray sub-band processing that encapsulates a wide range of sub-band units. Then we build this effective sub-band embedding with a Conformer-based structure and multi-view attention. As for the full-band features, we make use of the FullSubNet+ architecture with its full-band extractor to get global spectral information. Finally, a Conformer-based fusion model combines the above information sources to predict the complex ideal ratio mask (cIRM). Experimental results on the VoiceBank-DEMAND benchmark task reveal that this novel framework outperforms FullSubNet+ by promoting the quality of processed utterances and reducing the implementation complexity for faster real-time computation.",
keywords = "FullSubNet, Speech enhancement, complex spectrum, real-time computation, sub-band processing",
author = "Tsao, {Yu Sheng} and Ho, {Kuan Hsun} and Hung, {Jeih Weih} and Berlin Chen",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2022 IEEE Spoken Language Technology Workshop, SLT 2022 ; Conference date: 09-01-2023 Through 12-01-2023",
year = "2023",
doi = "10.1109/SLT54892.2023.10023439",
language = "English",
series = "2022 IEEE Spoken Language Technology Workshop, SLT 2022 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "458--464",
booktitle = "2022 IEEE Spoken Language Technology Workshop, SLT 2022 - Proceedings",
}