tdf#159483 sc HTML paste: handle data-sheets-value here, too

HTML import into Calc could already create text cells, but HTML paste
with the same content remained auto-converted to numbers
unconditionally.

Turns out HTML paste goes via ScHTMLLayoutParser instead of the HTML
import's ScHTMLQueryParser, so the data-sheets-value was ignored for
paste.

Fix the problem by extracting the old data-sheets-value handler from
ScHTMLQueryParser to a separate ParseDataSheetsValue(), and use it also
in ScHTMLLayoutParser.

For the actual handling, still only text is handled, no other formats
yet.

Change-Id: I0b2bf4665af331d07624ed42e30a24e31bfca331
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163068
Reviewed-by: Miklos Vajna <[email protected]>
Tested-by: Jenkins
diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk
index b78349d..f3dec22 100644
--- a/sc/CppunitTest_sc_filter_html.mk
+++ b/sc/CppunitTest_sc_filter_html.mk
@@ -58,6 +58,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \
$(eval $(call gb_CppunitTest_set_include,sc_filter_html,\
    -I$(SRCDIR)/sc/source/ui/inc \
    -I$(SRCDIR)/sc/inc \
    -I$(SRCDIR)/sc/qa/unit \
    $$(INCLUDE) \
))

diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index 76413c6..ba50361 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -16,16 +16,19 @@

#include <comphelper/propertyvalue.hxx>

#include <helper/qahelper.hxx>
#include <impex.hxx>

using namespace com::sun::star;

namespace
{
/// Covers sc/source/filter/html/ fixes.
class Test : public UnoApiXmlTest, public HtmlTestTools
class Test : public ScModelTestBase, public HtmlTestTools
{
public:
    Test()
        : UnoApiXmlTest("/sc/qa/filter/html/data/")
        : ScModelTestBase("/sc/qa/filter/html/data/")
    {
    }
};
@@ -55,6 +58,31 @@ CPPUNIT_TEST_FIXTURE(Test, testTdAsText)
    // i.e. data-sheets-value was ignored on import.
    CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType);
}

CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsText)
{
    // Given an empty document:
    createScDoc();

    // When pasting HTML with an A2 cell that contains "01" as text:
    ScDocument* pDoc = getScDoc();
    ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0);
    ScImportExport aImporter(*pDoc, aCellPos);
    SvFileStream aFile(createFileURL(u"text.html"), StreamMode::READ);
    SvMemoryStream aMemory;
    aMemory.WriteStream(aFile);
    aMemory.Seek(0);
    CPPUNIT_ASSERT(aImporter.ImportStream(aMemory, OUString(), SotClipboardFormatId::HTML));

    // Then make sure "01" is not auto-converted to 1, as a number:
    aCellPos = ScAddress(/*nColP=*/0, /*nRowP=*/1, /*nTabP=*/0);
    CellType eCellType = pDoc->GetCellType(aCellPos);
    // Without the accompanying fix in place, this test would have failed with:
    // - Expected: 2 (CELLTYPE_STRING)
    // - Actual  : 1 (CELLTYPE_VALUE)
    // i.e. data-sheets-value was ignored on paste.
    CPPUNIT_ASSERT_EQUAL(CELLTYPE_STRING, eCellType);
}
}

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx
index 5d46d12..1a7eff2 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -75,6 +75,31 @@
using ::editeng::SvxBorderLine;
using namespace ::com::sun::star;

namespace
{
/// data-sheets-value from google sheets, value is a JSON.
void ParseDataSheetsValue(const OUString& rDataSheetsValue, sal_uInt32& rNumberFormat)
{
    // data-sheets-value from google sheets, value is a JSON.
    OString aEncodedOption = rDataSheetsValue.toUtf8();
    const char* pEncodedOption = aEncodedOption.getStr();
    std::stringstream aStream(pEncodedOption);
    boost::property_tree::ptree aTree;
    boost::property_tree::read_json(aStream, aTree);
    // The "1" key describes the original data type.
    auto it = aTree.find("1");
    if (it != aTree.not_found())
    {
        int nValueType = std::stoi(it->second.get_value<std::string>());
        // 2 is text.
        if (nValueType == 2)
        {
            rNumberFormat = NF_STANDARD_FORMAT_TEXT;
        }
    }
}
}

ScHTMLStyles::ScHTMLStyles() : maEmpty() {}

void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
@@ -914,6 +939,7 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
    bInCell = true;
    bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
    const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
    sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
    for (const auto & rOption : rOptions)
    {
        switch( rOption.GetToken() )
@@ -982,10 +1008,18 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
                mxActEntry->pNumStr = rOption.GetString();
            }
            break;
            case HtmlOptionId::DSVAL:
            {
                ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
            }
            break;
            default: break;
        }
    }

    if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
        mxActEntry->aItemSet.Put(SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat));

    mxActEntry->nCol = nColCnt;
    mxActEntry->nRow = nRowCnt;
    mxActEntry->nTab = nTable;
@@ -2129,23 +2163,7 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
                break;
                case HtmlOptionId::DSVAL:
                {
                    // data-sheets-value from google sheets, value is a JSON.
                    OString aEncodedOption = rOption.GetString().toUtf8();
                    const char* pEncodedOption = aEncodedOption.getStr();
                    std::stringstream aStream(pEncodedOption);
                    boost::property_tree::ptree aTree;
                    boost::property_tree::read_json(aStream, aTree);
                    // The "1" key describes the original data type.
                    auto it = aTree.find("1");
                    if (it != aTree.not_found())
                    {
                        int nValueType = std::stoi(it->second.get_value<std::string>());
                        // 2 is text.
                        if (nValueType == 2)
                        {
                            nNumberFormat = NF_STANDARD_FORMAT_TEXT;
                        }
                    }
                    ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
                }
                break;
                default: break;
diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx
index fcdf6b4..5b2d441 100644
--- a/sc/source/filter/inc/htmlpars.hxx
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -149,6 +149,7 @@ class HTMLOption;
typedef ::std::map<SCROW, SCROW> InnerMap;
typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;

/// HTML parser used during paste into Calc.
class ScHTMLLayoutParser : public ScHTMLParser
{
private:
@@ -575,6 +576,8 @@ public:

    Builds the table structure correctly, ignores extended formatting like
    pictures or column widths.

    Used during file load / import into Calc.
 */
class ScHTMLQueryParser : public ScHTMLParser
{