tdf#159483 sc HTML paste: handle data-sheets-formula attribute

When a formula cell gets copied from google docs, the value of the <td>
element only contains the formula result. This means once value cells
and formula cells gets copied together, the pasted formula cell won't
updated anymore when the input values change.

Turns out there is a data-sheets-formula attribute on <td> that contains
the formula, it seems it uses the R1C1 format.

Fix the problem by extending ScHTMLLayoutParser::TableDataOn() to parse
this attribute and set a formula on the target cell (rather than a
value) if the formula is available.

This required also extending ScEEImport a bit, since the HTML paste
builds on top of the RTF one in Calc.

Change-Id: I720df96ce74a5e865b7329d06f3b719551f31b96
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163234
Reviewed-by: Miklos Vajna <[email protected]>
Tested-by: Jenkins
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index 80e47b1..36e8539 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -448,6 +448,7 @@
#define OOO_STRING_SVTOOLS_HTML_O_DSval "data-sheets-value"
#define OOO_STRING_SVTOOLS_HTML_O_SDnum "sdnum"
#define OOO_STRING_SVTOOLS_HTML_O_DSnum "data-sheets-numberformat"
#define OOO_STRING_SVTOOLS_HTML_O_DSformula "data-sheets-formula"
#define OOO_STRING_SVTOOLS_HTML_O_sdlibrary "sdlibrary"
#define OOO_STRING_SVTOOLS_HTML_O_sdmodule "sdmodule"
#define OOO_STRING_SVTOOLS_HTML_O_sdevent "sdevent-"
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h
index 21fcec8..89d8ee8 100644
--- a/include/svtools/htmltokn.h
+++ b/include/svtools/htmltokn.h
@@ -347,6 +347,7 @@ STRING_START        = BOOL_END,
    DSVAL,
    SDNUM, // StarDiv NumberFormat
    DSNUM,
    DSFORMULA,
    SDLIBRARY,
    SDMODULE,
STRING_END,
diff --git a/sc/qa/filter/html/data/formula.html b/sc/qa/filter/html/data/formula.html
new file mode 100644
index 0000000..f6c9245
--- /dev/null
+++ b/sc/qa/filter/html/data/formula.html
@@ -0,0 +1,7 @@
<table>
  <tr>
    <td data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:1}">1</td>
    <td data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:2}">2</td>
    <td data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:3}" data-sheets-formula="=SUM(R[0]C[-2]:R[0]C[-1])">3</td>
  </tr>
</table>
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index b669763..83e35d9 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -143,6 +143,31 @@ CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsFormattedNumber)
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(1000),
                         pDoc->GetValue(/*col=*/0, /*row=*/0, /*tab=*/0));
}

CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsFormula)
{
    // Given an empty document:
    createScDoc();

    // When pasting HTML with cells containing a formula:
    ScDocument* pDoc = getScDoc();
    ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0);
    ScImportExport aImporter(*pDoc, aCellPos);
    SvFileStream aFile(createFileURL(u"formula.html"), StreamMode::READ);
    SvMemoryStream aMemory;
    aMemory.WriteStream(aFile);
    aMemory.Seek(0);
    CPPUNIT_ASSERT(aImporter.ImportStream(aMemory, OUString(), SotClipboardFormatId::HTML));

    // Then make sure C1 is a sum and it evaluates to 3:
    // Without the accompanying fix in place, this test would have failed with:
    // - Expected: =SUM(A1:B1)
    // - Actual  :
    // i.e. only the formula result was imported, not the formula.
    CPPUNIT_ASSERT_EQUAL(OUString("=SUM(A1:B1)"),
                         pDoc->GetFormula(/*col=*/2, /*row=*/0, /*tab=*/0));
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(3), pDoc->GetValue(/*col=*/2, /*row=*/0, /*tab=*/0));
}
}

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx
index 78aaafd..5db879d 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -155,6 +155,14 @@ void ParseDataSheetsNumberformat(const OUString& rDataSheetsValue, std::optional
        }
    }
}

/// data-sheets-formula from google sheets, grammar is R1C1 reference style.
void ParseDataSheetsFormula(const OUString& rDataSheetsFormula, std::optional<OUString>& rVal,
                            std::optional<formula::FormulaGrammar::Grammar>& rGrammar)
{
    rVal = rDataSheetsFormula;
    rGrammar = formula::FormulaGrammar::GRAM_ENGLISH_XL_R1C1;
}
}

ScHTMLStyles::ScHTMLStyles() : maEmpty() {}
@@ -1074,6 +1082,12 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
                ParseDataSheetsNumberformat(rOption.GetString(), mxActEntry->pNumStr);
            }
            break;
            case HtmlOptionId::DSFORMULA:
            {
                ParseDataSheetsFormula(rOption.GetString(), mxActEntry->moFormulaStr,
                                       mxActEntry->moFormulaGrammar);
            }
            break;
            default: break;
        }
    }
diff --git a/sc/source/filter/inc/eeparser.hxx b/sc/source/filter/inc/eeparser.hxx
index ebc383e..e890a80 100644
--- a/sc/source/filter/inc/eeparser.hxx
+++ b/sc/source/filter/inc/eeparser.hxx
@@ -56,6 +56,8 @@ struct ScEEParseEntry
    ESelection          aSel;           // Selection in EditEngine
    std::optional<OUString>
                        pValStr;        // HTML possibly SDVAL string
    std::optional<OUString> moFormulaStr;
    std::optional<formula::FormulaGrammar::Grammar> moFormulaGrammar;
    std::optional<OUString>
                        pNumStr;        // HTML possibly SDNUM string
    std::optional<OUString>
diff --git a/sc/source/filter/rtf/eeimpars.cxx b/sc/source/filter/rtf/eeimpars.cxx
index a2eca48..4dcaf50 100644
--- a/sc/source/filter/rtf/eeimpars.cxx
+++ b/sc/source/filter/rtf/eeimpars.cxx
@@ -340,6 +340,11 @@ void ScEEImport::WriteToDocument( bool bSizeColsRows, double nOutputFactor, SvNu

                if (!aValStr.isEmpty())
                    mpDoc->SetValue( nCol, nRow, nTab, fVal );
                else if (pE->moFormulaStr && pE->moFormulaGrammar)
                {
                    mpDoc->SetFormula(ScAddress(nCol, nRow, nTab), *pE->moFormulaStr,
                                      *pE->moFormulaGrammar);
                }
                else if ( !pE->aSel.HasRange() )
                {
                    // maybe ALT text of IMG or similar
diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx
index a3dff93..434e1cb 100644
--- a/svtools/source/svhtml/htmlkywd.cxx
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -527,6 +527,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = {
    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSval),     HtmlOptionId::DSVAL},
    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum),     HtmlOptionId::SDNUM}, // StarDiv NumberFormat
    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSnum),     HtmlOptionId::DSNUM},
    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSformula), HtmlOptionId::DSFORMULA},
    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), HtmlOptionId::SDLIBRARY},
    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule),  HtmlOptionId::SDMODULE},