Skip to content

Commit

Permalink
feat: support insertAll for range (#1909)
Browse files Browse the repository at this point in the history
* feat: support insertAll for range

* revert INTERVAL regex

* lint

* add unit test

* lint
  • Loading branch information
Linchin authored May 6, 2024
1 parent 0e39066 commit 74e75e8
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 4 deletions.
52 changes: 50 additions & 2 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
r"(?P<days>-?\d+) "
r"(?P<time_sign>-?)(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)\.?(?P<fraction>\d*)?$"
)
_RANGE_PATTERN = re.compile(r"\[.*, .*\)")

BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST"
"""Environment variable defining host for emulator."""
Expand Down Expand Up @@ -334,9 +335,8 @@ def _range_from_json(value, field):
The parsed range object from ``value`` if the ``field`` is not
null (otherwise it is :data:`None`).
"""
range_literal = re.compile(r"\[.*, .*\)")
if _not_null(value, field):
if range_literal.match(value):
if _RANGE_PATTERN.match(value):
start, end = value[1:-1].split(", ")
start = _range_element_from_json(start, field.range_element_type)
end = _range_element_from_json(end, field.range_element_type)
Expand Down Expand Up @@ -531,6 +531,52 @@ def _time_to_json(value):
return value


def _range_element_to_json(value, element_type=None):
"""Coerce 'value' to an JSON-compatible representation."""
if value is None:
return None
elif isinstance(value, str):
if value.upper() in ("UNBOUNDED", "NULL"):
return None
else:
# We do not enforce range element value to be valid to reduce
# redundancy with backend.
return value
elif (
element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS
):
converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper())
return converter(value)
else:
raise ValueError(
f"Unsupported RANGE element type {element_type}, or "
"element type is empty. Must be DATE, DATETIME, or "
"TIMESTAMP"
)


def _range_field_to_json(range_element_type, value):
"""Coerce 'value' to an JSON-compatible representation."""
if isinstance(value, str):
# string literal
if _RANGE_PATTERN.match(value):
start, end = value[1:-1].split(", ")
else:
raise ValueError(f"RANGE literal {value} has incorrect format")
elif isinstance(value, dict):
# dictionary
start = value.get("start")
end = value.get("end")
else:
raise ValueError(
f"Unsupported type of RANGE value {value}, must be " "string or dict"
)

start = _range_element_to_json(start, range_element_type)
end = _range_element_to_json(end, range_element_type)
return {"start": start, "end": end}


# Converters used for scalar values marshalled to the BigQuery API, such as in
# query parameters or the tabledata.insert API.
_SCALAR_VALUE_TO_JSON_ROW = {
Expand Down Expand Up @@ -676,6 +722,8 @@ def _single_field_to_json(field, row_value):

if field.field_type == "RECORD":
return _record_field_to_json(field.fields, row_value)
if field.field_type == "RANGE":
return _range_field_to_json(field.range_element_type, row_value)

return _scalar_field_to_json(field, row_value)

Expand Down
114 changes: 112 additions & 2 deletions tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,10 +1049,22 @@ def test_w_datetime(self):
self.assertEqual(self._call_fut(when), "12:13:41")


def _make_field(field_type, mode="NULLABLE", name="testing", fields=()):
def _make_field(
field_type,
mode="NULLABLE",
name="testing",
fields=(),
range_element_type=None,
):
from google.cloud.bigquery.schema import SchemaField

return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields)
return SchemaField(
name=name,
field_type=field_type,
mode=mode,
fields=fields,
range_element_type=range_element_type,
)


class Test_scalar_field_to_json(unittest.TestCase):
Expand Down Expand Up @@ -1251,6 +1263,98 @@ def test_w_dict_unknown_fields(self):
)


class Test_range_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
from google.cloud.bigquery._helpers import _range_field_to_json

return _range_field_to_json(field, value)

def test_w_date(self):
field = _make_field("RANGE", range_element_type="DATE")
start = datetime.date(2016, 12, 3)
original = {"start": start}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03", "end": None}
self.assertEqual(converted, expected)

def test_w_date_string(self):
field = _make_field("RANGE", range_element_type="DATE")
original = {"start": "2016-12-03"}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03", "end": None}
self.assertEqual(converted, expected)

def test_w_datetime(self):
field = _make_field("RANGE", range_element_type="DATETIME")
start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456)
original = {"start": start}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03T14:11:27.123456", "end": None}
self.assertEqual(converted, expected)

def test_w_datetime_string(self):
field = _make_field("RANGE", range_element_type="DATETIME")
original = {"start": "2016-12-03T14:11:27.123456"}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03T14:11:27.123456", "end": None}
self.assertEqual(converted, expected)

def test_w_timestamp(self):
from google.cloud._helpers import UTC

field = _make_field("RANGE", range_element_type="TIMESTAMP")
start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC)
original = {"start": start}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None}
self.assertEqual(converted, expected)

def test_w_timestamp_string(self):
field = _make_field("RANGE", range_element_type="TIMESTAMP")
original = {"start": "2016-12-03T14:11:27.123456Z"}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None}
self.assertEqual(converted, expected)

def test_w_timestamp_float(self):
field = _make_field("RANGE", range_element_type="TIMESTAMP")
original = {"start": 12.34567}
converted = self._call_fut(field.range_element_type, original)
expected = {"start": 12.34567, "end": None}
self.assertEqual(converted, expected)

def test_w_string_literal(self):
field = _make_field("RANGE", range_element_type="DATE")
original = "[2016-12-03, UNBOUNDED)"
converted = self._call_fut(field.range_element_type, original)
expected = {"start": "2016-12-03", "end": None}
self.assertEqual(converted, expected)

def test_w_unsupported_range_element_type(self):
field = _make_field("RANGE", range_element_type="TIME")
with self.assertRaises(ValueError):
self._call_fut(
field.range_element_type,
{"start": datetime.time(12, 13, 41)},
)

def test_w_no_range_element_type(self):
field = _make_field("RANGE")
with self.assertRaises(ValueError):
self._call_fut(field.range_element_type, "2016-12-03")

def test_w_incorrect_literal_format(self):
field = _make_field("RANGE", range_element_type="DATE")
original = "[2016-12-03, UNBOUNDED]"
with self.assertRaises(ValueError):
self._call_fut(field.range_element_type, original)

def test_w_unsupported_representation(self):
field = _make_field("RANGE", range_element_type="DATE")
with self.assertRaises(ValueError):
self._call_fut(field.range_element_type, object())


class Test_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
from google.cloud.bigquery._helpers import _field_to_json
Expand Down Expand Up @@ -1285,6 +1389,12 @@ def test_w_scalar(self):
converted = self._call_fut(field, original)
self.assertEqual(converted, str(original))

def test_w_range(self):
field = _make_field("RANGE", range_element_type="DATE")
original = {"start": "2016-12-03", "end": "2024-12-03"}
converted = self._call_fut(field, original)
self.assertEqual(converted, original)


class Test_snake_to_camel_case(unittest.TestCase):
def _call_fut(self, value):
Expand Down

0 comments on commit 74e75e8

Please sign in to comment.