@inproceedings{2467634029c347b3aa1da98bf86a9900,
title = "Exploring Branchformer-Based End-to-End Speaker Diarization with Speaker-Wise VAD Loss",
abstract = "Speaker diarization involves partitioning an audio stream into segments according to the identity of the speaker. The encoder-decoder based attractors for the end-to-end neural diarization (EEND-EDA) model can handle overlapping speech and has shown promising performance compared to traditional methods. However, EEND-EDA fails to identify the number of speakers accurately. To address this limitation, we first replace the Transformer encoder in EEND-EDA with the Branchformer encoder. Additionally, we introduce speaker-wise VAD Loss (SAD Loss) to the self-attention mechanism of the Branchformer encoder, thereby improving the model's ability to distinguish different speakers. Extensive experimental results on the Mini-Librispeech and simulated dataset Sim2spk benchmark dataset suggest that our approach outperforms existing strong baselines by a substantial margin, achieving a significant improvement of more than 15\% Diarization Error Rate (DER). We will release the source code on GitHub1 for future research.",
keywords = "auxiliary loss, branchformer, end-to-end neural di-arization, multi -head attention, speaker diarization",
author = "Lee, \{Pei Ying\} and Guo, \{Hau Yun\} and Lo, \{Tien Hong\} and Berlin Chen",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 27th Conference on the Oriental COCOSDA International Committee for the Co-Ordination and Standardisation of Speech Databases and Assessment Techniques, O-COCOSDA 2024 ; Conference date: 17-10-2024 Through 19-10-2024",
year = "2024",
doi = "10.1109/O-COCOSDA64382.2024.10800120",
language = "English",
series = "2024 27th Conference on the Oriental COCOSDA International Committee for the Co-Ordination and Standardisation of Speech Databases and Assessment Techniques, O-COCOSDA 2024 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Ming-Hsiang Su and Jui-Feng Yeh and Yuan-Fu Liao and Chi-Chun Lee and Yu Taso",
booktitle = "2024 27th Conference on the Oriental COCOSDA International Committee for the Co-Ordination and Standardisation of Speech Databases and Assessment Techniques, O-COCOSDA 2024 - Proceedings",
}