@inproceedings{5de1fbd25a7e40b4b12edaaca2cf8258,
title = "Quality assurance of automatic annotation of very large corpora: A study based on heterogeneous tagging systems",
abstract = "We propose a set of heuristics for improving annotation quality of very large corpora efficiently. The Xinhua News portion of the Chinese Gigaword Corpus was tagged independently with both the Peking University ICL tagset and the Academia Sinica CKIP tagset. The corpus-based POS tags mapping will serve as the basis of the possible contrast in grammatical systems between PRC and Taiwan. And it can serve as the basic model for mapping between the CKIP and ICL tagging systems for any data.",
author = "Huang, {Chu Ren} and Lee, {Lung Hao} and Qu, {Wei Guang} and Hong, {Jia Fei} and Shiwen Yu",
year = "2008",
language = "English",
series = "Proceedings of the 6th International Conference on Language Resources and Evaluation, LREC 2008",
publisher = "European Language Resources Association (ELRA)",
pages = "2725--2729",
booktitle = "Proceedings of the 6th International Conference on Language Resources and Evaluation, LREC 2008",
note = "6th International Conference on Language Resources and Evaluation, LREC 2008 ; Conference date: 28-05-2008 Through 30-05-2008",
}