@article{8617a539fb564f7a80c297135ec50fe5,
title = "Deep learning-based real-time multiple-person action recognition system",
abstract = "Action recognition has gained great attention in automatic video analysis, greatly reducing the cost of human resources for smart surveillance. Most methods, however, focus on the detection of only one action event for a single person in a well-segmented video, rather than the recognition of multiple actions performed by more than one person at the same time for an untrimmed video. In this paper, we propose a deep learning-based multiple-person action recognition system for use in various real-time smart surveillance applications. By capturing a video stream of the scene, the proposed system can detect and track multiple people appearing in the scene and subsequently recognize their actions. Thanks to high resolution of the video frames, we establish a zoom-in function to obtain more satisfactory action recognition results when people in the scene become too far from the camera. To further improve the accuracy, recognition results from inflated 3D ConvNet (I3D) with multiple sliding windows are processed by a nonmaximum suppression (NMS) approach to obtain a more robust decision. Experimental results show that the proposed method can perform multiple-person action recognition in real time suitable for applications such as long-term care environments.",
keywords = "Action recognition, Deep learning, Human tracking, Smart surveillance",
author = "Tsai, {Jen Kai} and Hsu, {Chen Chien} and Wang, {Wei Yen} and Huang, {Shao Kang}",
note = "Funding Information: Author Contributions: Conceptualization, J.-K.T. and C.-C.H.; methodology, J.-K.T.; software, J.-K.T.; validation, Jv.-aKli.dTa.,tiwonri,tiJn.-gK—.To.,riwgirnitainl gd—raoftripgrienpaal rdartaioftn,pJr.e-Kpa.Tr.a;twiorni,t iJn.-gK—.Tr.e;vwierwitinang—d eredvitiienwg ,aJn.-dK.eTd.,itCin.-gC, .HJ.-.K, a.Tn.d, CS.-CK..HH..,; and S.-K.H.; visualization, J.-K.T.; supervision, C.-C.H. and W.-Y.W.; project administration, C.-C.H.; funding C.-C.H. and W.-Y.W. All authors have read and agreed to the published version of the manuscript. acquisition, C.-C.H. and W.-Y.W. All authors have read and agreed to the published version of the manuscript. Funding: This research was funded by the Chinese Language and Technology Center of National Taiwan Normal Funding: This research was funded by the Chinese Language and Technology Center of National Taiwan Normal University (NTNU) from The Featured Areas Research Center Program within the Framework of the THaiigwhaenr ,EudnudceartiGorna nStpsrnouo.t MPrOojSeTct 1b0y9 -t2h6e3 4M-Fin-0i0st3r-y00o6f aEndduMcaOtioSnT (1M09O-2E6)3 i4n- FT-a0i0w3-a0n0,7 atnhdro MuginhiPsterryvoasf iSvceieAnrcteif iacniadl Technology, Taiwan, under Grants no. MOST 109-2634-F-003-006 and MOST 109-2634-F-003-007 through Pervasive Artificial Intelligence Research (PAIR) Labs. Funding Information: This research was funded by the Chinese Language and Technology Center of National Taiwan Normal University (NTNU) from The Featured Areas Research Center Program within the Framework of the Higher Education Sprout Project by the Ministry of Education (MOE) in Taiwan, and Ministry of Science and Technology, Taiwan, under Grants no. MOST 109-2634-F-003-006 and MOST 109-2634-F-003-007 through Pervasive Artificial Intelligence Research (PAIR) Labs. Publisher Copyright: {\textcopyright} 2020 by the authors. Licensee MDPI, Basel, Switzerland.",
year = "2020",
month = sep,
day = "1",
doi = "10.3390/s20174758",
language = "English",
volume = "20",
pages = "1--17",
journal = "Sensors (Switzerland)",
issn = "1424-8220",
publisher = "Multidisciplinary Digital Publishing Institute (MDPI)",
number = "17",
}