@inproceedings{0f842e2874604915bbfae7ed5f944e66,
title = "Data Cleaning Methods for Client and Proxy Logs",
abstract = "In this paper we present our experiences with the cleaning of Web client and proxy usage logs, based on a long-term browsing study with 25 participants. A detailed clickstream log, recorded using a Web intermediary, was combined with a second log of user interface actions, which was captured by a modified Firefox browser for a subset of the participants. The consolidated data from both records revealed many page requests that were not directly related to user actions. For participants who had no ad-filtering system installed, these artifacts made up one third of all transferred Web pages. Three major reasons could be identified: HTML Frames and iFrames, advertisements, and automatic page reloads. The experiences made during the data cleaning process might help other researchers to choose adequate filtering methods for their data.",
keywords = "EWI-9155, HMI-IE: Information Engineering, IR-66893, METIS-237939, HMI-HF: Human Factors",
author = "H. Weinreich and H. Obendorf and E. Herder",
year = "2006",
month = may,
day = "23",
language = "Undefined",
isbn = "not assigned",
publisher = "Dalhousie University",
number = "We.C1.3",
pages = "--",
editor = "A. Edmonds and K. Hawkey and M. Kellar and D. Turnbull",
booktitle = "Workshop on Logging Traces of Web Activity: The Mechanics of Data Collection",
}