tdf#159483 sc HTML paste: handle data-sheets- attributes on a span

Copy multiple cells, including a formula from google sheets to Calc, the
formula is handled as a formula by the HTML paste. Do the same for a
single cell, then only the result is pasted.

The trouble is that the data-sheets-* attributes appear on <td> elements
for multiple cells, but they appear on a <span> for a single cell.

Fix the problem by extending ScHTMLLayoutParser::ProcToken() to handle
the HtmlTokenId::SPAN_ON token and share the code between the <td> and
<span> handler, so this markup works in both cases.

Note that this is the paste handler, the no changes to the normal HTML
import are made for now.

Change-Id: Id749df9062d8fcb9a2f0acd928585a304efaae28
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163291
Reviewed-by: Miklos Vajna <[email protected]>
Tested-by: Jenkins
diff --git a/sc/qa/filter/html/data/single-cell.html b/sc/qa/filter/html/data/single-cell.html
new file mode 100644
index 0000000..0b5613f
--- /dev/null
+++ b/sc/qa/filter/html/data/single-cell.html
@@ -0,0 +1 @@
<span style="font-size:10pt;font-family:Arial;font-style:normal;text-align:right;" data-sheets-root="1" data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:3}" data-sheets-formula="=SUM(R[0]C[-2]:R[0]C[-1])">3</span>
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index 83e35d9..c6112e3 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -168,6 +168,30 @@ CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsFormula)
                         pDoc->GetFormula(/*col=*/2, /*row=*/0, /*tab=*/0));
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(3), pDoc->GetValue(/*col=*/2, /*row=*/0, /*tab=*/0));
}

CPPUNIT_TEST_FIXTURE(Test, testPasteSingleCell)
{
    // Given a document with '1' in A1 and '2' in B1:
    createScDoc();
    ScDocument* pDoc = getScDoc();
    pDoc->SetValue(ScAddress(0, 0, 0), 1.0);
    pDoc->SetValue(ScAddress(1, 0, 0), 2.0);

    // When pasting SUM(A1:B1) into C1:
    ScAddress aCellPos(/*nColP=*/2, /*nRowP=*/0, /*nTabP=*/0);
    ScImportExport aImporter(*pDoc, aCellPos);
    SvFileStream aFile(createFileURL(u"single-cell.html"), StreamMode::READ);
    CPPUNIT_ASSERT(aImporter.ImportStream(aFile, OUString(), SotClipboardFormatId::HTML));

    // Then make sure C1 is a sum and it evaluates to 3:
    // Without the accompanying fix in place, this test would have failed with:
    // - Expected: =SUM(A1:B1)
    // - Actual  :
    // i.e. data-sheets-* on <td> worked, but not on <span>.
    CPPUNIT_ASSERT_EQUAL(OUString("=SUM(A1:B1)"),
                         pDoc->GetFormula(/*col=*/2, /*row=*/0, /*tab=*/0));
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(3), pDoc->GetValue(/*col=*/2, /*row=*/0, /*tab=*/0));
}
}

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx
index 5db879d..830dac6 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -992,6 +992,34 @@ IMPL_LINK( ScHTMLLayoutParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
    }
}

void ScHTMLLayoutParser::HandleDataSheetsAttributes(const HTMLOptions& rOptions)
{
    for (const auto& rOption : rOptions)
    {
        switch (rOption.GetToken())
        {
            case HtmlOptionId::DSVAL:
            {
                ParseDataSheetsValue(rOption.GetString(), mxActEntry->pValStr, mxActEntry->pNumStr);
                break;
            }
            case HtmlOptionId::DSNUM:
            {
                ParseDataSheetsNumberformat(rOption.GetString(), mxActEntry->pNumStr);
                break;
            }
            case HtmlOptionId::DSFORMULA:
            {
                ParseDataSheetsFormula(rOption.GetString(), mxActEntry->moFormulaStr,
                                       mxActEntry->moFormulaGrammar);
                break;
            }
            default:
                break;
        }
    }
}

void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
{
    if ( bInCell )
@@ -1072,26 +1100,12 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
                mxActEntry->pNumStr = rOption.GetString();
            }
            break;
            case HtmlOptionId::DSVAL:
            {
                ParseDataSheetsValue(rOption.GetString(), mxActEntry->pValStr, mxActEntry->pNumStr);
            }
            break;
            case HtmlOptionId::DSNUM:
            {
                ParseDataSheetsNumberformat(rOption.GetString(), mxActEntry->pNumStr);
            }
            break;
            case HtmlOptionId::DSFORMULA:
            {
                ParseDataSheetsFormula(rOption.GetString(), mxActEntry->moFormulaStr,
                                       mxActEntry->moFormulaGrammar);
            }
            break;
            default: break;
        }
    }

    HandleDataSheetsAttributes(rOptions);

    mxActEntry->nCol = nColCnt;
    mxActEntry->nRow = nRowCnt;
    mxActEntry->nTab = nTable;
@@ -1101,6 +1115,12 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
            SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY) );
}

void ScHTMLLayoutParser::SpanOn(HtmlImportInfo* pInfo)
{
    const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
    HandleDataSheetsAttributes(rOptions);
}

void ScHTMLLayoutParser::TableRowOn( const HtmlImportInfo* pInfo )
{
    if ( nColCnt > nColCntStart )
@@ -1638,6 +1658,11 @@ void ScHTMLLayoutParser::ProcToken( HtmlImportInfo* pInfo )
            TableDataOn( pInfo );
        }
        break;
        case HtmlTokenId::SPAN_ON:
        {
            SpanOn(pInfo);
        }
        break;
        case HtmlTokenId::TABLEHEADER_OFF:
        case HtmlTokenId::TABLEDATA_OFF:        // Closes cell
        {
diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx
index 5b2d441..1ac9aa0 100644
--- a/sc/source/filter/inc/htmlpars.hxx
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -28,6 +28,7 @@
#include <utility>
#include <vector>
#include <o3tl/sorted_vector.hxx>
#include <svtools/parhtml.hxx>

#include <rangelst.hxx>
#include "eeparser.hxx"
@@ -212,6 +213,9 @@ private:
    void                Image( HtmlImportInfo* );
    void                AnchorOn( HtmlImportInfo* );
    void                FontOn( HtmlImportInfo* );
    void SpanOn(HtmlImportInfo* pInfo);
    /// Handles the various data-sheets-* attributes on <td> and <span>.
    void HandleDataSheetsAttributes(const HTMLOptions& rOptions);

public:
                        ScHTMLLayoutParser( EditEngine*, OUString aBaseURL, const Size& aPageSize, ScDocument* );