From afa03ce7efb4fe5a0d71ad779df5a1a7d16abb44 Mon Sep 17 00:00:00 2001 From: zakons Date: Sun, 10 Dec 2017 23:45:34 -0500 Subject: [PATCH 1/5] Add cell_value method to PartialRowData --- bigtable/google/cloud/bigtable/row_data.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/bigtable/google/cloud/bigtable/row_data.py b/bigtable/google/cloud/bigtable/row_data.py index 9bde1c0cb5a3..b151746fb82b 100644 --- a/bigtable/google/cloud/bigtable/row_data.py +++ b/bigtable/google/cloud/bigtable/row_data.py @@ -171,6 +171,24 @@ def row_key(self): """ return self._row_key + def cell_value(self, column_family_id, column_id, index=0): + """Get a cell value + + :type column_family_id: str + :param column_family_id: The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + :type column: bytes + :param column: The column within the column family where the cell + is located. + + :type index: int + :param index: The offset in the series of values, default = 0 + + :rtype: bytes or :class:`int` + :returns: the cell value + """ + return self._cells[column_family_id][column_id][index].value + class InvalidReadRowsResponse(RuntimeError): """Exception raised to to invalid response data from back-end.""" From 11891b44d2957f7a54c5cc49ad4c6b03aca7bb59 Mon Sep 17 00:00:00 2001 From: zakons Date: Mon, 11 Dec 2017 23:46:41 -0500 Subject: [PATCH 2/5] Add unit test for cell_value method on PartialRowData --- bigtable/google/cloud/bigtable/row_data.py | 4 ++-- bigtable/tests/unit/test_row_data.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/bigtable/google/cloud/bigtable/row_data.py b/bigtable/google/cloud/bigtable/row_data.py index b151746fb82b..75356f28968d 100644 --- a/bigtable/google/cloud/bigtable/row_data.py +++ b/bigtable/google/cloud/bigtable/row_data.py @@ -171,7 +171,7 @@ def row_key(self): """ return self._row_key - def cell_value(self, column_family_id, column_id, index=0): + def cell_value(self, column_family_id, column, index=0): """Get a cell value :type column_family_id: str @@ -187,7 +187,7 @@ def cell_value(self, column_family_id, column_id, index=0): :rtype: bytes or :class:`int` :returns: the cell value """ - return self._cells[column_family_id][column_id][index].value + return self._cells[column_family_id][column][index].value class InvalidReadRowsResponse(RuntimeError): diff --git a/bigtable/tests/unit/test_row_data.py b/bigtable/tests/unit/test_row_data.py index 7cfb1dc45d4e..8197be2568b6 100644 --- a/bigtable/tests/unit/test_row_data.py +++ b/bigtable/tests/unit/test_row_data.py @@ -174,6 +174,27 @@ def test_to_dict(self): } self.assertEqual(result, expected_result) + def test_cell_value(self): + from google.cloud.bigtable.row_data import Cell + + family_name = u'name1' + qual = b'col1' + + timestamp = object() + value = b'value-bytes' + cell = Cell(value, timestamp) + + partial_row_data = self._make_one(None) + partial_row_data._cells = { + family_name: { + qual: [cell] + } + } + + result = partial_row_data.cell_value(family_name, qual) + expected_result = value + self.assertEqual(result, expected_result) + def test_cells_property(self): partial_row_data = self._make_one(None) cells = {1: 2} From 35c0c35f4dd4ac1656e26c182ebd18fa03d4de99 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 13 Dec 2017 15:13:56 -0800 Subject: [PATCH 3/5] Re-factoring `cell_value()` to `get_cell()`. --- bigtable/google/cloud/bigtable/row_data.py | 73 +++++++++++---- bigtable/tests/unit/test_row_data.py | 100 +++++++++++++++++---- 2 files changed, 140 insertions(+), 33 deletions(-) diff --git a/bigtable/google/cloud/bigtable/row_data.py b/bigtable/google/cloud/bigtable/row_data.py index 75356f28968d..fe9204b91ce7 100644 --- a/bigtable/google/cloud/bigtable/row_data.py +++ b/bigtable/google/cloud/bigtable/row_data.py @@ -22,6 +22,16 @@ from google.cloud._helpers import _to_bytes +_MISSING_COLUMN_FAMILY = ( + 'Column family {} is not among the cells stored in this row.') +_MISSING_COLUMN = ( + 'Column {} is not among the cells stored in this row in the ' + 'column family {}.') +_MISSING_INDEX = ( + 'Index {!r} is not valid for the cells stored in this row for column {} ' + 'in the column family {}. There are {} such cells.') + + class Cell(object): """Representation of a Google Cloud Bigtable Cell. @@ -171,23 +181,54 @@ def row_key(self): """ return self._row_key - def cell_value(self, column_family_id, column, index=0): - """Get a cell value - - :type column_family_id: str - :param column_family_id: The ID of the column family. Must be of the - form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. - :type column: bytes - :param column: The column within the column family where the cell - is located. - - :type index: int - :param index: The offset in the series of values, default = 0 - - :rtype: bytes or :class:`int` - :returns: the cell value + def get_cell(self, column_family_id, column, index=0): + """Get a single cell stored on this instance. + + .. note:: + + This returns a copy of the actual ``Cell`` (so that the + caller cannot mutate internal state). + + Args: + column_family_id (str): The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + column (bytes): The column within the column family where the cell + is located. + index (Optional[int]): The offset within the series of values. If + not specified, will return the first cell. + + Returns: + ~google.cloud.bigtable.row_data.Cell: The cell stored in the + specified column. + + Raises: + KeyError: If ``column_family_id`` is not among the cells stored + in this row. + KeyError: If ``column`` is not among the cells stored in this row + for the given ``column_family_id``. + IndexError: If ``index`` cannot be found within the cells stored + in this row for the given ``column_family_id``, ``column`` + pair. """ - return self._cells[column_family_id][column][index].value + try: + column_family = self._cells[column_family_id] + except KeyError: + raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id)) + + try: + cells = column_family[column] + except KeyError: + raise KeyError(_MISSING_COLUMN.format(column, column_family_id)) + + try: + cell = cells[index] + except (TypeError, IndexError): + num_cells = len(cells) + msg = _MISSING_INDEX.format( + index, column, column_family_id, num_cells) + raise IndexError(msg) + + return copy.deepcopy(cell) class InvalidReadRowsResponse(RuntimeError): diff --git a/bigtable/tests/unit/test_row_data.py b/bigtable/tests/unit/test_row_data.py index 8197be2568b6..c8a20957cdbd 100644 --- a/bigtable/tests/unit/test_row_data.py +++ b/bigtable/tests/unit/test_row_data.py @@ -13,6 +13,10 @@ # limitations under the License. +import datetime +import json +import operator +import os import unittest import mock @@ -30,7 +34,6 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def _from_pb_test_helper(self, labels=None): - import datetime from google.cloud._helpers import _EPOCH from google.cloud.bigtable._generated import ( data_pb2 as data_v2_pb2) @@ -174,26 +177,89 @@ def test_to_dict(self): } self.assertEqual(result, expected_result) - def test_cell_value(self): - from google.cloud.bigtable.row_data import Cell + def test_get_cell_defaults(self): + family_name = u'name1' + qual = b'col1' + cell = _make_cell(b'') + + partial_row_data = self._make_one(None) + partial_row_data._cells = { + family_name: { + qual: [cell], + }, + } + result = partial_row_data.get_cell(family_name, qual) + # Make sure we get a copy, not the original. + self.assertIsNot(result, cell) + self.assertEqual(result, cell) + + def test_get_cell_explicit_index(self): family_name = u'name1' qual = b'col1' + cell1 = _make_cell(b'1') + cell2 = _make_cell(b'2') - timestamp = object() - value = b'value-bytes' - cell = Cell(value, timestamp) + partial_row_data = self._make_one(None) + partial_row_data._cells = { + family_name: { + qual: [cell1, cell2], + }, + } + + result = partial_row_data.get_cell(family_name, qual, index=1) + # Make sure we get a copy, not the original. + self.assertIsNot(result, cell2) + self.assertEqual(result, cell2) + + def test_get_cell_bad_family(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + partial_row_data = self._make_one(None) + self.assertEqual(partial_row_data._cells, {}) + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cell(family_name, None) + + expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cell_bad_column(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + qual = b'col1' + + partial_row_data = self._make_one(None) + partial_row_data._cells = {family_name: {}} + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cell(family_name, qual) + + expected_arg = row_data._MISSING_COLUMN.format(qual, family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cell_bad_index(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + qual = b'col1' partial_row_data = self._make_one(None) partial_row_data._cells = { family_name: { - qual: [cell] - } + qual: [], + }, } - result = partial_row_data.cell_value(family_name, qual) - expected_result = value - self.assertEqual(result, expected_result) + for index in (5, 'not-int'): + with self.assertRaises(IndexError) as exc_info: + partial_row_data.get_cell(family_name, qual, index=index) + + expected_arg = row_data._MISSING_INDEX.format( + index, qual, family_name, 0) + self.assertEqual(exc_info.exception.args, (expected_arg,)) def test_cells_property(self): partial_row_data = self._make_one(None) @@ -454,8 +520,6 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def _load_json_test(self, test_name): - import os - if self.__class__._json_tests is None: dirname = os.path.dirname(__file__) filename = os.path.join(dirname, 'read-rows-acceptance-test.json') @@ -521,8 +585,6 @@ def test_invalid_commit_with_chunk(self): # JSON Error cases: incomplete final row def _sort_flattend_cells(self, flattened): - import operator - key_func = operator.itemgetter('rk', 'fm', 'qual') return sorted(flattened, key=key_func) @@ -738,8 +800,6 @@ def _parse_readrows_acceptance_tests(filename): test/resources/com/google/cloud/bigtable/grpc/scanner/v2/ read-rows-acceptance-test.json """ - import json - with open(filename) as json_file: test_json = json.load(json_file) @@ -748,3 +808,9 @@ def _parse_readrows_acceptance_tests(filename): chunks = _generate_cell_chunks(test['chunks']) results = test['results'] yield name, chunks, results + + +def _make_cell(value): + from google.cloud.bigtable import row_data + + return row_data.Cell(value, datetime.datetime.utcnow()) From fad0c51228f7c0b48f5c06adbaf6705c991787a1 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 13 Dec 2017 15:18:58 -0800 Subject: [PATCH 4/5] Adding `PartialRowData.get_cells()`. --- bigtable/google/cloud/bigtable/row_data.py | 70 ++++++++++++++++++---- bigtable/tests/unit/test_row_data.py | 48 +++++++++++++++ 2 files changed, 108 insertions(+), 10 deletions(-) diff --git a/bigtable/google/cloud/bigtable/row_data.py b/bigtable/google/cloud/bigtable/row_data.py index fe9204b91ce7..b86f3331ae3a 100644 --- a/bigtable/google/cloud/bigtable/row_data.py +++ b/bigtable/google/cloud/bigtable/row_data.py @@ -181,12 +181,43 @@ def row_key(self): """ return self._row_key + def _get_cells_no_copy(self, column_family_id, column): + """Get a time series of cells stored on this instance. + + Args: + column_family_id (str): The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + column (bytes): The column within the column family where the cells + are located. + + Returns: + List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the + specified column. + + Raises: + KeyError: If ``column_family_id`` is not among the cells stored + in this row. + KeyError: If ``column`` is not among the cells stored in this row + for the given ``column_family_id``. + """ + try: + column_family = self._cells[column_family_id] + except KeyError: + raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id)) + + try: + cells = column_family[column] + except KeyError: + raise KeyError(_MISSING_COLUMN.format(column, column_family_id)) + + return cells + def get_cell(self, column_family_id, column, index=0): """Get a single cell stored on this instance. .. note:: - This returns a copy of the actual ``Cell`` (so that the + This returns a copy of the actual cell (so that the caller cannot mutate internal state). Args: @@ -210,15 +241,7 @@ def get_cell(self, column_family_id, column, index=0): in this row for the given ``column_family_id``, ``column`` pair. """ - try: - column_family = self._cells[column_family_id] - except KeyError: - raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id)) - - try: - cells = column_family[column] - except KeyError: - raise KeyError(_MISSING_COLUMN.format(column, column_family_id)) + cells = self._get_cells_no_copy(column_family_id, column) try: cell = cells[index] @@ -230,6 +253,33 @@ def get_cell(self, column_family_id, column, index=0): return copy.deepcopy(cell) + def get_cells(self, column_family_id, column): + """Get a time series of cells stored on this instance. + + .. note:: + + This returns a copy of the actual cells (so that the + caller cannot mutate internal state). + + Args: + column_family_id (str): The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + column (bytes): The column within the column family where the cells + are located. + + Returns: + List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the + specified column. + + Raises: + KeyError: If ``column_family_id`` is not among the cells stored + in this row. + KeyError: If ``column`` is not among the cells stored in this row + for the given ``column_family_id``. + """ + cells = self._get_cells_no_copy(column_family_id, column) + return copy.deepcopy(cells) + class InvalidReadRowsResponse(RuntimeError): """Exception raised to to invalid response data from back-end.""" diff --git a/bigtable/tests/unit/test_row_data.py b/bigtable/tests/unit/test_row_data.py index c8a20957cdbd..770261cb5cb3 100644 --- a/bigtable/tests/unit/test_row_data.py +++ b/bigtable/tests/unit/test_row_data.py @@ -261,6 +261,54 @@ def test_get_cell_bad_index(self): index, qual, family_name, 0) self.assertEqual(exc_info.exception.args, (expected_arg,)) + def test_get_cells(self): + family_name = u'name1' + qual = b'col1' + cell = _make_cell(b'hi-mom') + + partial_row_data = self._make_one(None) + cells = [cell] + partial_row_data._cells = { + family_name: { + qual: cells, + }, + } + + result = partial_row_data.get_cells(family_name, qual) + # Make sure we get a copy, not the original. + self.assertIsNot(result, cells) + self.assertEqual(result, cells) + self.assertIsNot(result[0], cell) + self.assertEqual(result[0], cell) + + def test_get_cells_bad_family(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + partial_row_data = self._make_one(None) + self.assertEqual(partial_row_data._cells, {}) + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cells(family_name, None) + + expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cell_bad_column(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + qual = b'col1' + + partial_row_data = self._make_one(None) + partial_row_data._cells = {family_name: {}} + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cells(family_name, qual) + + expected_arg = row_data._MISSING_COLUMN.format(qual, family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + def test_cells_property(self): partial_row_data = self._make_one(None) cells = {1: 2} From ad1a87dd17558bce23e704bb5be1e33ecd9ebfe2 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 13 Dec 2017 15:47:31 -0800 Subject: [PATCH 5/5] Renaming test. --- bigtable/tests/unit/test_row_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigtable/tests/unit/test_row_data.py b/bigtable/tests/unit/test_row_data.py index 770261cb5cb3..c50988c855df 100644 --- a/bigtable/tests/unit/test_row_data.py +++ b/bigtable/tests/unit/test_row_data.py @@ -294,7 +294,7 @@ def test_get_cells_bad_family(self): expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name) self.assertEqual(exc_info.exception.args, (expected_arg,)) - def test_get_cell_bad_column(self): + def test_get_cells_bad_column(self): from google.cloud.bigtable import row_data family_name = u'name1'