author = {Asma Rabaoui and MANUEL DAVY and St├ęphane Rossignol and Noureddine ELLOUZE},
title = {One-Class SVMs and Wavelets for Audio Surveillance},
journal = {IEEE Transactions on Information Forensics \& Security},
year = {2008},
abstract = { This paper presents a procedure aimed at recognizing environmental sounds for surveillance and security applications. We propose to apply One-Class Support Vector Machines (1-SVMs) together with a sophisticated dissimilarity measure as a discriminative framework in order to address audio classification, and hence, sound recognition. We illustrate the performance of this method on an audio database, which consists of 1015 sounds belonging to 9 classes. The used database presents high intra-class diversity in the signal properties and some kind of inter-class similarities. The number of items in each class is deliberately not equal, and sometimes very different which results in conducting experiments that simulate non-uniform probability of sound appearances. First, the use of a set of state-of-the-art audio features is studied. Then, we introduce a set of novel features obtained by combining elementary features. The experiments conducted on a multi-class classification problem show the superiority of this novel sound recognition method. The best recognition accuracy (96.89%) is obtained when combining in the feature vector wavelet-based features, MFCCs and individual temporal and frequency features. Our 1-SVM approach overperforms the conventional HMM-based system in the conducted experiments, the improvement in the error rate can reach 50%. Besides, we provide empirical results comparing the single-class SVM to a two-class SVM method. We discuss the superiority of the proposed methodologies and approaches based on 1-SVM addressing a multi-class problem. Moreover, the robustness to the environmental noise is investigated for specific types of acoustic representations. We showed that we can efficiently address a sound classification problem characterized by complex real-world datasets, even under important noise degradation conditions. }