@inproceedings{krenn-etal-2024-germs,
title = "{GERMS}-{AT}: A Sexism/Misogyny Dataset of Forum Comments from an {A}ustrian Online Newspaper",
author = "Krenn, Brigitte and
Petrak, Johann and
Kubina, Marina and
Burger, Christian",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.683",
pages = "7728--7739",
abstract = "Brigitte Krenn, Johann Petrak, Marina Kubina, Christian Burger This paper presents a sexism/misogyny dataset extracted from comments of a large online forum of an Austrian newspaper. The comments are in Austrian German language, and in some cases interspersed with dialectal or English elements. We describe the data collection, the annotation guidelines and the annotation process resulting in a corpus of approximately 8 000 comments which were annotated with 5 levels of sexism/misogyny, ranging from 0 (not sexist/misogynist) to 4 (highly sexist/misogynist). The professional forum moderators (self-identified females and males) of the online newspaper were involved as experts in the creation of the annotation guidelines and the annotation of the user comments. In addition, we also describe first results of training transformer-based classification models for both binarized and original label classification of the corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krenn-etal-2024-germs">
<titleInfo>
<title>GERMS-AT: A Sexism/Misogyny Dataset of Forum Comments from an Austrian Online Newspaper</title>
</titleInfo>
<name type="personal">
<namePart type="given">Brigitte</namePart>
<namePart type="family">Krenn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johann</namePart>
<namePart type="family">Petrak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Kubina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Burger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Brigitte Krenn, Johann Petrak, Marina Kubina, Christian Burger This paper presents a sexism/misogyny dataset extracted from comments of a large online forum of an Austrian newspaper. The comments are in Austrian German language, and in some cases interspersed with dialectal or English elements. We describe the data collection, the annotation guidelines and the annotation process resulting in a corpus of approximately 8 000 comments which were annotated with 5 levels of sexism/misogyny, ranging from 0 (not sexist/misogynist) to 4 (highly sexist/misogynist). The professional forum moderators (self-identified females and males) of the online newspaper were involved as experts in the creation of the annotation guidelines and the annotation of the user comments. In addition, we also describe first results of training transformer-based classification models for both binarized and original label classification of the corpus.</abstract>
<identifier type="citekey">krenn-etal-2024-germs</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.683</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7728</start>
<end>7739</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GERMS-AT: A Sexism/Misogyny Dataset of Forum Comments from an Austrian Online Newspaper
%A Krenn, Brigitte
%A Petrak, Johann
%A Kubina, Marina
%A Burger, Christian
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F krenn-etal-2024-germs
%X Brigitte Krenn, Johann Petrak, Marina Kubina, Christian Burger This paper presents a sexism/misogyny dataset extracted from comments of a large online forum of an Austrian newspaper. The comments are in Austrian German language, and in some cases interspersed with dialectal or English elements. We describe the data collection, the annotation guidelines and the annotation process resulting in a corpus of approximately 8 000 comments which were annotated with 5 levels of sexism/misogyny, ranging from 0 (not sexist/misogynist) to 4 (highly sexist/misogynist). The professional forum moderators (self-identified females and males) of the online newspaper were involved as experts in the creation of the annotation guidelines and the annotation of the user comments. In addition, we also describe first results of training transformer-based classification models for both binarized and original label classification of the corpus.
%U https://aclanthology.org/2024.lrec-main.683
%P 7728-7739
Markdown (Informal)
[GERMS-AT: A Sexism/Misogyny Dataset of Forum Comments from an Austrian Online Newspaper](https://aclanthology.org/2024.lrec-main.683) (Krenn et al., LREC-COLING 2024)
ACL