@inproceedings{dadd51c204e3461ab7a28116fc34df91,
title = "Online Detection Of Vocal Listener Responses With Maximum Latency Constraints",
abstract = "When human listeners utter Listener Responses (e.g. back-channels or acknowledgments) such as 'yeah' and 'mmhmm', interlocutors commonly continue to speak or resume their speech even before the listener has nished his/her response. This type of speech interactivity results in frequent speech overlap which is common in human-human conversation. To allow for this type of speech interactivity to occur between humans and spoken dialog systems, which will result in more human-like continuous and smoother human-machine interaction, we propose an on-line classier which can classify incoming speech as Listener Responses. We show that it is possible to detect vocal Listener Responses using maximum latency thresholds of 100-500 ms, thereby obtaining equal error rates ranging from 34% to 28% by using an energy based voice activity detector.",
keywords = "METIS-277647, EC Grant Agreement nr.: FP7/231287, EWI-20186, IR-77316",
author = "Daniel Neiberg and Truong, {Khiet Phuong}",
year = "2011",
month = may,
doi = "10.1109/ICASSP.2011.5947688",
language = "Undefined",
isbn = "978-1-4577-0538-0",
publisher = "IEEE",
pages = "5836--5839",
booktitle = "Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)",
address = "United States",
note = "IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2011 ; Conference date: 22-05-2011 Through 27-05-2011",
}