@inproceedings{alonso-etal-2024-pixt3,
title = "{P}ix{T}3: Pixel-based Table-To-Text Generation",
author = "Alonso, I{\~n}igo and
Agirre, Eneko and
Lapata, Mirella",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.364",
doi = "10.18653/v1/2024.acl-long.364",
pages = "6721--6736",
abstract = "Table-to-text generation involves generating appropriate textual descriptions given structured tabular data. It has attracted increasing attention in recent years thanks to the popularity of neural network models and the availability of large-scale datasets. A common feature across existing methods is their treatment of the input as a string, i.e., by employing linearization techniques that do not always preserve information in the table, are verbose, and lack space efficiency. We propose to rethink data-to-text generation as a visual recognition task, removing the need for rendering the input in a string format. We present PixT3, a multimodal table-to-text model that overcomes the challenges of linearization and input size limitations encountered by existing models. PixT3 is trained with a new self-supervised learning objective to reinforce table structure awareness and is applicable to open-ended and controlled generation settings. Experiments on the ToTTo and Logic2Text benchmarks show that PixT3 is competitive and, in some settings, superior to generators that operate solely on text.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alonso-etal-2024-pixt3">
<titleInfo>
<title>PixT3: Pixel-based Table-To-Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iñigo</namePart>
<namePart type="family">Alonso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eneko</namePart>
<namePart type="family">Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mirella</namePart>
<namePart type="family">Lapata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Table-to-text generation involves generating appropriate textual descriptions given structured tabular data. It has attracted increasing attention in recent years thanks to the popularity of neural network models and the availability of large-scale datasets. A common feature across existing methods is their treatment of the input as a string, i.e., by employing linearization techniques that do not always preserve information in the table, are verbose, and lack space efficiency. We propose to rethink data-to-text generation as a visual recognition task, removing the need for rendering the input in a string format. We present PixT3, a multimodal table-to-text model that overcomes the challenges of linearization and input size limitations encountered by existing models. PixT3 is trained with a new self-supervised learning objective to reinforce table structure awareness and is applicable to open-ended and controlled generation settings. Experiments on the ToTTo and Logic2Text benchmarks show that PixT3 is competitive and, in some settings, superior to generators that operate solely on text.</abstract>
<identifier type="citekey">alonso-etal-2024-pixt3</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.364</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.364</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>6721</start>
<end>6736</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PixT3: Pixel-based Table-To-Text Generation
%A Alonso, Iñigo
%A Agirre, Eneko
%A Lapata, Mirella
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F alonso-etal-2024-pixt3
%X Table-to-text generation involves generating appropriate textual descriptions given structured tabular data. It has attracted increasing attention in recent years thanks to the popularity of neural network models and the availability of large-scale datasets. A common feature across existing methods is their treatment of the input as a string, i.e., by employing linearization techniques that do not always preserve information in the table, are verbose, and lack space efficiency. We propose to rethink data-to-text generation as a visual recognition task, removing the need for rendering the input in a string format. We present PixT3, a multimodal table-to-text model that overcomes the challenges of linearization and input size limitations encountered by existing models. PixT3 is trained with a new self-supervised learning objective to reinforce table structure awareness and is applicable to open-ended and controlled generation settings. Experiments on the ToTTo and Logic2Text benchmarks show that PixT3 is competitive and, in some settings, superior to generators that operate solely on text.
%R 10.18653/v1/2024.acl-long.364
%U https://aclanthology.org/2024.acl-long.364
%U https://doi.org/10.18653/v1/2024.acl-long.364
%P 6721-6736
Markdown (Informal)
[PixT3: Pixel-based Table-To-Text Generation](https://aclanthology.org/2024.acl-long.364) (Alonso et al., ACL 2024)
ACL
- Iñigo Alonso, Eneko Agirre, and Mirella Lapata. 2024. PixT3: Pixel-based Table-To-Text Generation. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 6721–6736, Bangkok, Thailand. Association for Computational Linguistics.