@inbook{c91ed76a78684137b40790b0b432014d,
title = "Biomedical text mining: State-of-the-art, open problems and future challenges",
abstract = "Text is a very important type of data within the biomedical domain. For example, patient records contain large amounts of text which has been entered in a non-standardized format, consequently posing a lot of challenges to processing of such data. For the clinical doctor the written text in the medical findings is still the basis for decision making-neither images nor multimedia data. However, the steadily increasing volumes of unstructured information need machine learning approaches for data mining, i.e. text mining. This paper provides a short, concise overview of some selected text mining methods, focusing on statistical methods, i.e. Latent Semantic Analysis, Probabilistic Latent Semantic Analysis, Latent Dirichlet Allocation, Hierarchical Latent Dirichlet Allocation, Principal Component Analysis, and Support Vector Machines, along with some examples from the biomedical domain. Finally, we provide some open problems and future challenges, particularly from the clinical domain, that we expect to stimulate future research.",
keywords = "Big data, Knowledge discovery, LDA, LSA, Natural language processing, PCA, PLSA, SVM, Statistical models, Text classification, Text mining, Unstructured information, hLDA",
author = "Andreas Holzinger and Johannes Schantl and Miriam Schroettner and Christin Seifert and Karin Verspoor",
year = "2014",
doi = "10.1007/978-3-662-43968-5_16",
language = "English",
isbn = "978-3-662-43967-8",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "271--300",
editor = "Andreas Holzinger and Igor Jurisica",
booktitle = "Interactive Knowledge Discovery and Data Mining in Biomedical Informatics",
}