tdf#159483 sc HTML paste: handle data-sheets-value here, too
HTML import into Calc could already create text cells, but HTML paste
with the same content remained auto-converted to numbers
unconditionally.
Turns out HTML paste goes via ScHTMLLayoutParser instead of the HTML
import's ScHTMLQueryParser, so the data-sheets-value was ignored for
paste.
Fix the problem by extracting the old data-sheets-value handler from
ScHTMLQueryParser to a separate ParseDataSheetsValue(), and use it also
in ScHTMLLayoutParser.
For the actual handling, still only text is handled, no other formats
yet.
Change-Id: I0b2bf4665af331d07624ed42e30a24e31bfca331
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163068
Reviewed-by: Miklos Vajna <[email protected]>
Tested-by: Jenkins
diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk
index b78349d..f3dec22 100644
--- a/sc/CppunitTest_sc_filter_html.mk
+++ b/sc/CppunitTest_sc_filter_html.mk
@@ -58,6 +58,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \
$(eval $(call gb_CppunitTest_set_include,sc_filter_html,\
-I$(SRCDIR)/sc/source/ui/inc \
-I$(SRCDIR)/sc/inc \
-I$(SRCDIR)/sc/qa/unit \
$$(INCLUDE) \
))
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index 76413c6..ba50361 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -16,16 +16,19 @@
#include <comphelper/propertyvalue.hxx>
#include <helper/qahelper.hxx>
#include <impex.hxx>
using namespace com::sun::star;
namespace
{
/// Covers sc/source/filter/html/ fixes.
class Test : public UnoApiXmlTest, public HtmlTestTools
class Test : public ScModelTestBase, public HtmlTestTools
{
public:
Test()
: UnoApiXmlTest("/sc/qa/filter/html/data/")
: ScModelTestBase("/sc/qa/filter/html/data/")
{
}
};
@@ -55,6 +58,31 @@ CPPUNIT_TEST_FIXTURE(Test, testTdAsText)
// i.e. data-sheets-value was ignored on import.
CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType);
}
CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsText)
{
// Given an empty document:
createScDoc();
// When pasting HTML with an A2 cell that contains "01" as text:
ScDocument* pDoc = getScDoc();
ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0);
ScImportExport aImporter(*pDoc, aCellPos);
SvFileStream aFile(createFileURL(u"text.html"), StreamMode::READ);
SvMemoryStream aMemory;
aMemory.WriteStream(aFile);
aMemory.Seek(0);
CPPUNIT_ASSERT(aImporter.ImportStream(aMemory, OUString(), SotClipboardFormatId::HTML));
// Then make sure "01" is not auto-converted to 1, as a number:
aCellPos = ScAddress(/*nColP=*/0, /*nRowP=*/1, /*nTabP=*/0);
CellType eCellType = pDoc->GetCellType(aCellPos);
// Without the accompanying fix in place, this test would have failed with:
// - Expected: 2 (CELLTYPE_STRING)
// - Actual : 1 (CELLTYPE_VALUE)
// i.e. data-sheets-value was ignored on paste.
CPPUNIT_ASSERT_EQUAL(CELLTYPE_STRING, eCellType);
}
}
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx
index 5d46d12..1a7eff2 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -75,6 +75,31 @@
using ::editeng::SvxBorderLine;
using namespace ::com::sun::star;
namespace
{
/// data-sheets-value from google sheets, value is a JSON.
void ParseDataSheetsValue(const OUString& rDataSheetsValue, sal_uInt32& rNumberFormat)
{
// data-sheets-value from google sheets, value is a JSON.
OString aEncodedOption = rDataSheetsValue.toUtf8();
const char* pEncodedOption = aEncodedOption.getStr();
std::stringstream aStream(pEncodedOption);
boost::property_tree::ptree aTree;
boost::property_tree::read_json(aStream, aTree);
// The "1" key describes the original data type.
auto it = aTree.find("1");
if (it != aTree.not_found())
{
int nValueType = std::stoi(it->second.get_value<std::string>());
// 2 is text.
if (nValueType == 2)
{
rNumberFormat = NF_STANDARD_FORMAT_TEXT;
}
}
}
}
ScHTMLStyles::ScHTMLStyles() : maEmpty() {}
void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
@@ -914,6 +939,7 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
bInCell = true;
bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
for (const auto & rOption : rOptions)
{
switch( rOption.GetToken() )
@@ -982,10 +1008,18 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
mxActEntry->pNumStr = rOption.GetString();
}
break;
case HtmlOptionId::DSVAL:
{
ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
}
break;
default: break;
}
}
if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
mxActEntry->aItemSet.Put(SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat));
mxActEntry->nCol = nColCnt;
mxActEntry->nRow = nRowCnt;
mxActEntry->nTab = nTable;
@@ -2129,23 +2163,7 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
break;
case HtmlOptionId::DSVAL:
{
// data-sheets-value from google sheets, value is a JSON.
OString aEncodedOption = rOption.GetString().toUtf8();
const char* pEncodedOption = aEncodedOption.getStr();
std::stringstream aStream(pEncodedOption);
boost::property_tree::ptree aTree;
boost::property_tree::read_json(aStream, aTree);
// The "1" key describes the original data type.
auto it = aTree.find("1");
if (it != aTree.not_found())
{
int nValueType = std::stoi(it->second.get_value<std::string>());
// 2 is text.
if (nValueType == 2)
{
nNumberFormat = NF_STANDARD_FORMAT_TEXT;
}
}
ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
}
break;
default: break;
diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx
index fcdf6b4..5b2d441 100644
--- a/sc/source/filter/inc/htmlpars.hxx
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -149,6 +149,7 @@ class HTMLOption;
typedef ::std::map<SCROW, SCROW> InnerMap;
typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
/// HTML parser used during paste into Calc.
class ScHTMLLayoutParser : public ScHTMLParser
{
private:
@@ -575,6 +576,8 @@ public:
Builds the table structure correctly, ignores extended formatting like
pictures or column widths.
Used during file load / import into Calc.
*/
class ScHTMLQueryParser : public ScHTMLParser
{