tdf#159483 sc HTML export: handle data-sheets-value attribute for the text case

HTML pasting "'01" from google sheets to Calc worked already, but
HTML copying from Calc still converted 01 to 1.

What's required is to emit a general marker in the HTML output and then
the correct data-sheets-value attribute on the relevant <td> element.

Add a way to inject a marker in ScHTMLExport::WriteBody() (so
data-sheets-* is considered at all) and extend
ScHTMLExport::WriteCell() to write the data-sheets-value attribute for
the text case as a start.

Other types like booleans are not yet handled at export time.

Change-Id: Ib66e92c84235797cb4731e73d0a5b6286b6f3ab3
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163328
Reviewed-by: Miklos Vajna <[email protected]>
Tested-by: Jenkins
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index c6112e3..e0d7977 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -192,6 +192,28 @@ CPPUNIT_TEST_FIXTURE(Test, testPasteSingleCell)
                         pDoc->GetFormula(/*col=*/2, /*row=*/0, /*tab=*/0));
    CPPUNIT_ASSERT_EQUAL(static_cast<double>(3), pDoc->GetValue(/*col=*/2, /*row=*/0, /*tab=*/0));
}

CPPUNIT_TEST_FIXTURE(Test, testCopyText)
{
    // Given a document with 01 in A1:
    createScDoc();
    ScDocument* pDoc = getScDoc();
    ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0);
    pDoc->SetString(aCellPos, "'01");

    // When copying that text from A1:
    ScImportExport aExporter(*pDoc, aCellPos);
    SvMemoryStream aStream;
    CPPUNIT_ASSERT(aExporter.ExportStream(aStream, OUString(), SotClipboardFormatId::HTML));

    // Then make sure A1 is text:
    // Without the accompanying fix in place, this test would have failed with:
    // - XPath '//td' no attribute 'data-sheets-value' exist
    // i.e. metadata was missing to avoid converting 01 to 1 (number).
    aStream.Seek(0);
    htmlDocUniquePtr pHtmlDoc = parseHtmlStream(&aStream);
    assertXPath(pHtmlDoc, "//td"_ostr, "data-sheets-value"_ostr, "{ \"1\": 2, \"2\": \"01\"}");
}
}

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlexp.cxx b/sc/source/filter/html/htmlexp.cxx
index 2a3cb6a..a2f14f6 100644
--- a/sc/source/filter/html/htmlexp.cxx
+++ b/sc/source/filter/html/htmlexp.cxx
@@ -87,6 +87,7 @@
#include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
#include <rtl/strbuf.hxx>
#include <officecfg/Office/Common.hxx>
#include <tools/json_writer.hxx>

using ::editeng::SvxBorderLine;
using namespace ::com::sun::star;
@@ -671,6 +672,20 @@ void ScHTMLExport::WriteBody()
        }

        rStrm.WriteChar( '>' ); OUT_LF();

        // A marker right after <body> can be used, so that data-sheets-* attributes are considered
        // at all. This is disabled by default.
        OString aMarker;
        char* pEnv = getenv("SC_DEBUG_HTML_MARKER");
        if (pEnv)
        {
            aMarker = pEnv;
        }
        else if (comphelper::LibreOfficeKit::isActive())
        {
            aMarker = "<google-sheets-html-origin/>"_ostr;
        }
        rStrm.WriteOString(aMarker);
    }

    if ( bAll )
@@ -1128,6 +1143,17 @@ void ScHTMLExport::WriteCell( sc::ColumnBlockPosition& rBlockPos, SCCOL nCol, SC
    aStrTD.append(HTMLOutFuncs::CreateTableDataOptionsValNum(bValueData, fVal,
        nFormat, *pFormatter, &aNonConvertibleChars));

    if (!bValueData)
    {
        // 2 is text.
        tools::JsonWriter aJson;
        aJson.put("1", static_cast<sal_Int32>(2));
        aJson.put("2", pDoc->GetString(aPos));
        OUString aJsonString = OUString::fromUtf8(aJson.finishAndGetAsOString());
        aStrTD.append(" " OOO_STRING_SVTOOLS_HTML_O_DSval "=\""
                      + HTMLOutFuncs::ConvertStringToHTML(aJsonString) + "\"");
    }

    TAG_ON(aStrTD.makeStringAndClear());

    //write the note for this as the first thing in the tag