@inproceedings{53138d0c6f4b4f6d881f35453b68f5a0,
title = "Discriminating native from non-native speech using fusion of visual cues",
abstract = "The task of classifying accent, as belonging to a native language speaker or a foreign language speaker, has been so far addressed by means of the audio modality only. However, features extracted from the visual modality have been successfully used to extend or substitute audio-only approaches developed for speech or language recognition. This paper presents a fully automated approach to discriminating native from non-native speech in English, based exclusively on visual appearance features from speech. Long Short-Term Memory Neural Networks (LSTMs) are employed to model accent-related speech dynamics and yield accent-class predictions. Subject-independent experiments are conducted on speech episodes captured by mobile phones from the challenging MOBIO Database. We establish a text-dependent scenario, using only those recordings in which all subjects read the same paragraph. Our results show that decision level fusion of networks trained with complementary appearance descriptors consistently leads to performance improvement over single-feature systems, with the highest gain in accuracy reaching 7.3%. The best feature combinations achieve classification accuracy of 75%, rendering the proposed method a useful accent classification tool in cases of missing or noisy audio stream.",
keywords = "HMI-HF: Human Factors, EWI-25811, EC Grant Agreement nr.: FP7/611153, EC Grant Agreement nr.: FP7/2007-2013, METIS-309930, Non-Native Speech, Visual Speech Processing, Visual-only Accent Classification, IR-94678, Foreign Accent Detection",
author = "Christos Georgakis and Stavros Petridis and Maja Pantic",
note = "eemcs-eprint-25811 ; 22nd ACM Multimedia Conference, MM 2014 ; Conference date: 03-11-2014 Through 07-11-2014",
year = "2014",
month = nov,
doi = "10.1145/2647868.2655026",
language = "Undefined",
isbn = "978-1-4503-3063-3",
publisher = "Association for Computing Machinery",
pages = "1177--1180",
booktitle = "Proceedings of the ACM International Conference on Multimedia, MM 2014",
address = "United States",
}