@article {Zinn:2016:0266-4763:682,
title = "A statistical approach to address the problem of heaping in self-reported income data",
journal = "Journal of Applied Statistics",
parent_itemid = "infobike://tandf/cjas",
publishercode ="tandf",
year = "2016",
volume = "43",
number = "4",
publication date ="2016-03-11T00:00:00",
pages = "682-703",
itemtype = "ARTICLE",
issn = "0266-4763",
eissn = "1360-0532",
url = "https://www.ingentaconnect.com/content/tandf/cjas/2016/00000043/00000004/art00007",
doi = "doi:10.1080/02664763.2015.1077372",
keyword = "heaping, self-reported income data, 62P25, 62D99, 62F10, German National Educational Panel Study, zero-inflated log-normal distribution, 62F30, 62F25",
author = "Zinn, S. and W{\"u}rbach, A.",
abstract = "Self-reported income information particularly suffers from an intentional coarsening of the data, which is called heaping or rounding. If it does not occur completely at random which is usually the case heaping and rounding have detrimental effects on the results of
statistical analysis. Conventional statistical methods do not consider this kind of reporting bias, and thus might produce invalid inference. We describe a novel statistical modeling approach that allows us to deal with self-reported heaped income data in an adequate and flexible way. We suggest
modeling heaping mechanisms and the true underlying model in combination. To describe the true net income distribution, we use the zero-inflated log-normal distribution. Heaping points are identified from the data by applying a heuristic procedure comparing a hypothetical income distribution
and the empirical one. To determine heaping behavior, we employ two distinct models: either we assume piecewise constant heaping probabilities, or heaping probabilities are considered to increase steadily with proximity to a heaping point. We validate our approach by some examples. To illustrate
the capacity of the proposed method, we conduct a case study using income data from the German National Educational Panel Study.",
}