@inproceedings{rohatgi-etal-2023-acl,
title = "The {ACL} {OCL} Corpus: Advancing Open Science in Computational Linguistics",
author = "Rohatgi, Shaurya and
Qin, Yanxia and
Aw, Benjamin and
Unnithan, Niranjana and
Kan, Min-Yen",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.640/",
doi = "10.18653/v1/2023.emnlp-main.640",
pages = "10348--10361",
abstract = "We present ACL OCL, a scholarly corpus derived from the ACL Anthology to assist Open scientific research in the Computational Linguistics domain. Integrating and enhancing the previous versions of the ACL Anthology, the ACL OCL contributes metadata, PDF files, citation graphs and additional structured full texts with sections, figures, and links to a large knowledge resource (Semantic Scholar). The ACL OCL spans seven decades, containing 73K papers, alongside 210K figures. We spotlight how ACL OCL applies to observe trends in computational linguistics. By detecting paper topics with a supervised neural model, we note that interest in {\textquotedblleft}Syntax: Tagging, Chunking and Parsing{\textquotedblright} is waning and {\textquotedblleft}Natural Language Generation{\textquotedblright} is resurging. Our dataset is available from HuggingFace (https://huggingface.co/datasets/WINGNUS/ACL-OCL)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rohatgi-etal-2023-acl">
<titleInfo>
<title>The ACL OCL Corpus: Advancing Open Science in Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shaurya</namePart>
<namePart type="family">Rohatgi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxia</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Aw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niranjana</namePart>
<namePart type="family">Unnithan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present ACL OCL, a scholarly corpus derived from the ACL Anthology to assist Open scientific research in the Computational Linguistics domain. Integrating and enhancing the previous versions of the ACL Anthology, the ACL OCL contributes metadata, PDF files, citation graphs and additional structured full texts with sections, figures, and links to a large knowledge resource (Semantic Scholar). The ACL OCL spans seven decades, containing 73K papers, alongside 210K figures. We spotlight how ACL OCL applies to observe trends in computational linguistics. By detecting paper topics with a supervised neural model, we note that interest in “Syntax: Tagging, Chunking and Parsing” is waning and “Natural Language Generation” is resurging. Our dataset is available from HuggingFace (https://huggingface.co/datasets/WINGNUS/ACL-OCL).</abstract>
<identifier type="citekey">rohatgi-etal-2023-acl</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.640</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.640/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>10348</start>
<end>10361</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The ACL OCL Corpus: Advancing Open Science in Computational Linguistics
%A Rohatgi, Shaurya
%A Qin, Yanxia
%A Aw, Benjamin
%A Unnithan, Niranjana
%A Kan, Min-Yen
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F rohatgi-etal-2023-acl
%X We present ACL OCL, a scholarly corpus derived from the ACL Anthology to assist Open scientific research in the Computational Linguistics domain. Integrating and enhancing the previous versions of the ACL Anthology, the ACL OCL contributes metadata, PDF files, citation graphs and additional structured full texts with sections, figures, and links to a large knowledge resource (Semantic Scholar). The ACL OCL spans seven decades, containing 73K papers, alongside 210K figures. We spotlight how ACL OCL applies to observe trends in computational linguistics. By detecting paper topics with a supervised neural model, we note that interest in “Syntax: Tagging, Chunking and Parsing” is waning and “Natural Language Generation” is resurging. Our dataset is available from HuggingFace (https://huggingface.co/datasets/WINGNUS/ACL-OCL).
%R 10.18653/v1/2023.emnlp-main.640
%U https://aclanthology.org/2023.emnlp-main.640/
%U https://doi.org/10.18653/v1/2023.emnlp-main.640
%P 10348-10361
Markdown (Informal)
[The ACL OCL Corpus: Advancing Open Science in Computational Linguistics](https://aclanthology.org/2023.emnlp-main.640/) (Rohatgi et al., EMNLP 2023)
ACL