tdf#159483 sc HTML import: handle data-sheets-value attribute for the bool case

Copy a cell with boolean TRUE and FALSE from google docs to Calc, the
paste result is a TRUE and a FALSE string.

The problem is that boolean is meant to be a float 0 or 1 with custom
cell format, but we don't build this doc model.

Fix the problem by ParseDataSheetsValue() to write the properties of the
cell similar to what the normal HTML import would extract from our own
markup, like:

	<td height="17" align="right" sdval="1" sdnum="1033;0;BOOLEAN">TRUE</td>

This requires passing around both the value and the numbering
properties, since the cell format just decides it's a boolean, but the
cell value will decide if it's TRUE or FALSE.

Change-Id: Id558ced56e02bbe24330d82c3998b047dc8febdb
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163108
Reviewed-by: Miklos Vajna <[email protected]>
Tested-by: Jenkins
diff --git a/sc/qa/filter/html/data/bool.html b/sc/qa/filter/html/data/bool.html
new file mode 100644
index 0000000..8fe2799
--- /dev/null
+++ b/sc/qa/filter/html/data/bool.html
@@ -0,0 +1,8 @@
<table>
  <tr>
    <td data-sheets-value="{&quot;1&quot;:4,&quot;4&quot;:1}">WAHR</td>
  </tr>
  <tr>
    <td data-sheets-value="{&quot;1&quot;:4,&quot;4&quot;:0}">FALSCH</td>
  </tr>
</table>
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index ba50361..6ab2cc7 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -15,6 +15,8 @@
#include <com/sun/star/table/XCellRange.hpp>

#include <comphelper/propertyvalue.hxx>
#include <svl/numformat.hxx>
#include <svl/zformat.hxx>

#include <helper/qahelper.hxx>
#include <impex.hxx>
@@ -83,6 +85,37 @@ CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsText)
    // i.e. data-sheets-value was ignored on paste.
    CPPUNIT_ASSERT_EQUAL(CELLTYPE_STRING, eCellType);
}

CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsBools)
{
    // Given an empty document:
    createScDoc();

    // When pasting HTML with bool cells:
    ScDocument* pDoc = getScDoc();
    ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0);
    ScImportExport aImporter(*pDoc, aCellPos);
    SvFileStream aFile(createFileURL(u"bool.html"), StreamMode::READ);
    SvMemoryStream aMemory;
    aMemory.WriteStream(aFile);
    aMemory.Seek(0);
    CPPUNIT_ASSERT(aImporter.ImportStream(aMemory, OUString(), SotClipboardFormatId::HTML));

    // Then make sure A1's type is bool, value is true:
    sal_uInt32 nNumberFormat = pDoc->GetNumberFormat(/*col=*/0, /*row=*/0, /*tab=*/0);
    const SvNumberformat* pNumberFormat = pDoc->GetFormatTable()->GetEntry(nNumberFormat);
    // Without the accompanying fix in place, this test would have failed with:
    // - Expected: BOOLEAN
    // - Actual  : General
    // i.e. data-sheets-value's bool case was ignored.
    CPPUNIT_ASSERT_EQUAL(OUString("BOOLEAN"), pNumberFormat->GetFormatstring());
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(1), pDoc->GetValue(/*col=*/0, /*row=*/0, /*tab=*/0));
    // And make sure A2's type is bool, value is true:
    nNumberFormat = pDoc->GetNumberFormat(/*col=*/0, /*row=*/1, /*tab=*/0);
    pNumberFormat = pDoc->GetFormatTable()->GetEntry(nNumberFormat);
    CPPUNIT_ASSERT_EQUAL(OUString("BOOLEAN"), pNumberFormat->GetFormatstring());
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(0), pDoc->GetValue(/*col=*/0, /*row=*/1, /*tab=*/0));
}
}

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx
index 1a7eff2..12872dc 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -78,7 +78,7 @@ using namespace ::com::sun::star;
namespace
{
/// data-sheets-value from google sheets, value is a JSON.
void ParseDataSheetsValue(const OUString& rDataSheetsValue, sal_uInt32& rNumberFormat)
void ParseDataSheetsValue(const OUString& rDataSheetsValue, std::optional<OUString>& rVal, std::optional<OUString>& rNum)
{
    // data-sheets-value from google sheets, value is a JSON.
    OString aEncodedOption = rDataSheetsValue.toUtf8();
@@ -91,10 +91,27 @@ void ParseDataSheetsValue(const OUString& rDataSheetsValue, sal_uInt32& rNumberF
    if (it != aTree.not_found())
    {
        int nValueType = std::stoi(it->second.get_value<std::string>());
        // 2 is text.
        if (nValueType == 2)
        switch (nValueType)
        {
            rNumberFormat = NF_STANDARD_FORMAT_TEXT;
            case 2:
            {
                // 2 is text.
                // See SfxHTMLParser::GetTableDataOptionsValNum(), we leave the parse and a number
                // language unspecified.
                rNum = ";;@";
                break;
            }
            case 4:
            {
                // 4 is boolean.
                it = aTree.find("4");
                if (it != aTree.not_found())
                {
                    rVal = OUString::fromUtf8(it->second.get_value<std::string>());
                }
                rNum = ";;BOOLEAN";
                break;
            }
        }
    }
}
@@ -939,7 +956,6 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
    bInCell = true;
    bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
    const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
    sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
    for (const auto & rOption : rOptions)
    {
        switch( rOption.GetToken() )
@@ -1010,16 +1026,13 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
            break;
            case HtmlOptionId::DSVAL:
            {
                ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
                ParseDataSheetsValue(rOption.GetString(), mxActEntry->pValStr, mxActEntry->pNumStr);
            }
            break;
            default: break;
        }
    }

    if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
        mxActEntry->aItemSet.Put(SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat));

    mxActEntry->nCol = nColCnt;
    mxActEntry->nRow = nRowCnt;
    mxActEntry->nTab = nTable;
@@ -2163,7 +2176,7 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
                break;
                case HtmlOptionId::DSVAL:
                {
                    ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
                    ParseDataSheetsValue(rOption.GetString(), pValStr, pNumStr);
                }
                break;
                default: break;