@inproceedings{tan-etal-2021-reliability,
title = "Reliability Testing for Natural Language Processing Systems",
author = "Tan, Samson and
Joty, Shafiq and
Baxter, Kathy and
Taeihagh, Araz and
Bennett, Gregory A. and
Kan, Min-Yen",
editor = "Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-long.321/",
doi = "10.18653/v1/2021.acl-long.321",
pages = "4153--4169",
abstract = "Questions of fairness, robustness, and transparency are paramount to address before deploying NLP systems. Central to these concerns is the question of reliability: Can NLP systems reliably treat different demographics fairly and function correctly in diverse and noisy environments? To address this, we argue for the need for reliability testing and contextualize it among existing work on improving accountability. We show how adversarial attacks can be reframed for this goal, via a framework for developing reliability tests. We argue that reliability testing {---} with an emphasis on interdisciplinary collaboration {---} will enable rigorous and targeted testing, and aid in the enactment and enforcement of industry standards."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tan-etal-2021-reliability">
<titleInfo>
<title>Reliability Testing for Natural Language Processing Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samson</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shafiq</namePart>
<namePart type="family">Joty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathy</namePart>
<namePart type="family">Baxter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Araz</namePart>
<namePart type="family">Taeihagh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gregory</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Bennett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Questions of fairness, robustness, and transparency are paramount to address before deploying NLP systems. Central to these concerns is the question of reliability: Can NLP systems reliably treat different demographics fairly and function correctly in diverse and noisy environments? To address this, we argue for the need for reliability testing and contextualize it among existing work on improving accountability. We show how adversarial attacks can be reframed for this goal, via a framework for developing reliability tests. We argue that reliability testing — with an emphasis on interdisciplinary collaboration — will enable rigorous and targeted testing, and aid in the enactment and enforcement of industry standards.</abstract>
<identifier type="citekey">tan-etal-2021-reliability</identifier>
<identifier type="doi">10.18653/v1/2021.acl-long.321</identifier>
<location>
<url>https://aclanthology.org/2021.acl-long.321/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>4153</start>
<end>4169</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reliability Testing for Natural Language Processing Systems
%A Tan, Samson
%A Joty, Shafiq
%A Baxter, Kathy
%A Taeihagh, Araz
%A Bennett, Gregory A.
%A Kan, Min-Yen
%Y Zong, Chengqing
%Y Xia, Fei
%Y Li, Wenjie
%Y Navigli, Roberto
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F tan-etal-2021-reliability
%X Questions of fairness, robustness, and transparency are paramount to address before deploying NLP systems. Central to these concerns is the question of reliability: Can NLP systems reliably treat different demographics fairly and function correctly in diverse and noisy environments? To address this, we argue for the need for reliability testing and contextualize it among existing work on improving accountability. We show how adversarial attacks can be reframed for this goal, via a framework for developing reliability tests. We argue that reliability testing — with an emphasis on interdisciplinary collaboration — will enable rigorous and targeted testing, and aid in the enactment and enforcement of industry standards.
%R 10.18653/v1/2021.acl-long.321
%U https://aclanthology.org/2021.acl-long.321/
%U https://doi.org/10.18653/v1/2021.acl-long.321
%P 4153-4169
Markdown (Informal)
[Reliability Testing for Natural Language Processing Systems](https://aclanthology.org/2021.acl-long.321/) (Tan et al., ACL-IJCNLP 2021)
ACL
- Samson Tan, Shafiq Joty, Kathy Baxter, Araz Taeihagh, Gregory A. Bennett, and Min-Yen Kan. 2021. Reliability Testing for Natural Language Processing Systems. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 4153–4169, Online. Association for Computational Linguistics.