Skip to content

Commit cec376a

Browse files
authored
Merge pull request googleapis#2545 from dhermes/revamp-iterator-2
Moving backend specific behavior from Page to Iterator.
2 parents 22ca9d4 + 387a35e commit cec376a

9 files changed

Lines changed: 392 additions & 280 deletions

File tree

core/google/cloud/iterator.py

Lines changed: 129 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,19 @@
1717
These iterators simplify the process of paging through API responses
1818
where the response is a list of results with a ``nextPageToken``.
1919
20-
To make an iterator work, just override the ``PAGE_CLASS`` class
21-
attribute so that given a response (containing a page of results) can
22-
be parsed into an iterable page of the actual objects you want::
20+
To make an iterator work, you may need to override the
21+
``ITEMS_KEY`` class attribute so that a given response (containing a page of
22+
results) can be parsed into an iterable page of the actual objects you want::
2323
24-
class MyPage(Page):
24+
class MyIterator(Iterator):
25+
26+
ITEMS_KEY = 'blocks'
2527
2628
def _item_to_value(self, item):
2729
my_item = MyItemClass(other_arg=True)
2830
my_item._set_properties(item)
2931
return my_item
3032
31-
32-
class MyIterator(Iterator):
33-
34-
PAGE_CLASS = MyPage
35-
3633
You then can use this to get **all** the results from a resource::
3734
3835
>>> iterator = MyIterator(...)
@@ -69,12 +66,55 @@ class MyIterator(Iterator):
6966
2
7067
>>> iterator.page.remaining
7168
19
69+
70+
It's also possible to consume an entire page and handle the paging process
71+
manually::
72+
73+
>>> iterator = MyIterator(...)
74+
>>> # Manually pull down the first page.
75+
>>> iterator.update_page()
76+
>>> items = list(iterator.page)
77+
>>> items
78+
[
79+
<MyItemClass at 0x7fd64a098ad0>,
80+
<MyItemClass at 0x7fd64a098ed0>,
81+
<MyItemClass at 0x7fd64a098e90>,
82+
]
83+
>>> iterator.page.remaining
84+
0
85+
>>> iterator.page.num_items
86+
3
87+
>>> iterator.next_page_token
88+
'eav1OzQB0OM8rLdGXOEsyQWSG'
89+
>>>
90+
>>> # Ask for the next page to be grabbed.
91+
>>> iterator.update_page()
92+
>>> list(iterator.page)
93+
[
94+
<MyItemClass at 0x7fea740abdd0>,
95+
<MyItemClass at 0x7fea740abe50>,
96+
]
97+
>>>
98+
>>> # When there are no more results
99+
>>> iterator.update_page()
100+
>>> iterator.page is None
101+
True
72102
"""
73103

74104

75105
import six
76106

77107

108+
_UNSET = object()
109+
_NO_MORE_PAGES_ERR = 'Iterator has no more pages.'
110+
_UNSTARTED_ERR = (
111+
'Iterator has not been started. Either begin iterating, '
112+
'call next(my_iter) or call my_iter.update_page().')
113+
_PAGE_ERR_TEMPLATE = (
114+
'Tried to update the page while current page (%r) still has %d '
115+
'items remaining.')
116+
117+
78118
class Page(object):
79119
"""Single page of results in an iterator.
80120
@@ -83,23 +123,26 @@ class Page(object):
83123
84124
:type response: dict
85125
:param response: The JSON API response for a page.
86-
"""
87126
88-
ITEMS_KEY = 'items'
127+
:type items_key: str
128+
:param items_key: The dictionary key used to retrieve items
129+
from the response.
130+
"""
89131

90-
def __init__(self, parent, response):
132+
def __init__(self, parent, response, items_key):
91133
self._parent = parent
92-
items = response.get(self.ITEMS_KEY, ())
134+
items = response.get(items_key, ())
93135
self._num_items = len(items)
94136
self._remaining = self._num_items
95137
self._item_iter = iter(items)
138+
self.response = response
96139

97140
@property
98141
def num_items(self):
99142
"""Total items in the page.
100143
101144
:rtype: int
102-
:returns: The number of items in this page of items.
145+
:returns: The number of items in this page.
103146
"""
104147
return self._num_items
105148

@@ -108,31 +151,18 @@ def remaining(self):
108151
"""Remaining items in the page.
109152
110153
:rtype: int
111-
:returns: The number of items remaining this page.
154+
:returns: The number of items remaining in this page.
112155
"""
113156
return self._remaining
114157

115158
def __iter__(self):
116159
"""The :class:`Page` is an iterator."""
117160
return self
118161

119-
def _item_to_value(self, item):
120-
"""Get the next item in the page.
121-
122-
This method (along with the constructor) is the workhorse
123-
of this class. Subclasses will need to implement this method.
124-
125-
:type item: dict
126-
:param item: An item to be converted to a native object.
127-
128-
:raises NotImplementedError: Always
129-
"""
130-
raise NotImplementedError
131-
132162
def next(self):
133-
"""Get the next value in the iterator."""
163+
"""Get the next value in the page."""
134164
item = six.next(self._item_iter)
135-
result = self._item_to_value(item)
165+
result = self._parent._item_to_value(item)
136166
# Since we've successfully got the next value from the
137167
# iterator, we update the number of remaining.
138168
self._remaining -= 1
@@ -145,9 +175,10 @@ def next(self):
145175
class Iterator(object):
146176
"""A generic class for iterating through Cloud JSON APIs list responses.
147177
148-
Sub-classes need to over-write ``PAGE_CLASS``.
178+
Sub-classes need to over-write :attr:`ITEMS_KEY` and to define
179+
:meth:`_item_to_value`.
149180
150-
:type client: :class:`google.cloud.client.Client`
181+
:type client: :class:`~google.cloud.client.Client`
151182
:param client: The client, which owns a connection to make requests.
152183
153184
:type page_token: str
@@ -156,18 +187,22 @@ class Iterator(object):
156187
:type max_results: int
157188
:param max_results: (Optional) The maximum number of results to fetch.
158189
159-
:type extra_params: dict or None
160-
:param extra_params: Extra query string parameters for the API call.
190+
:type extra_params: dict
191+
:param extra_params: (Optional) Extra query string parameters for the
192+
API call.
161193
162194
:type path: str
163-
:param path: The path to query for the list of items.
195+
:param path: (Optional) The path to query for the list of items. Defaults
196+
to :attr:`PATH` on the current iterator class.
164197
"""
165198

166199
PAGE_TOKEN = 'pageToken'
167200
MAX_RESULTS = 'maxResults'
168201
RESERVED_PARAMS = frozenset([PAGE_TOKEN, MAX_RESULTS])
169-
PAGE_CLASS = Page
170202
PATH = None
203+
ITEMS_KEY = 'items'
204+
"""The dictionary key used to retrieve items from each response."""
205+
_PAGE_CLASS = Page
171206

172207
def __init__(self, client, page_token=None, max_results=None,
173208
extra_params=None, path=None):
@@ -180,7 +215,7 @@ def __init__(self, client, page_token=None, max_results=None,
180215
self.page_number = 0
181216
self.next_page_token = page_token
182217
self.num_results = 0
183-
self._page = None
218+
self._page = _UNSET
184219

185220
def _verify_params(self):
186221
"""Verifies the parameters don't use any reserved parameter.
@@ -197,46 +232,86 @@ def _verify_params(self):
197232
def page(self):
198233
"""The current page of results that has been retrieved.
199234
235+
If there are no more results, will return :data:`None`.
236+
200237
:rtype: :class:`Page`
201238
:returns: The page of items that has been retrieved.
239+
:raises AttributeError: If the page has not been set.
202240
"""
241+
if self._page is _UNSET:
242+
raise AttributeError(_UNSTARTED_ERR)
203243
return self._page
204244

205245
def __iter__(self):
206246
"""The :class:`Iterator` is an iterator."""
207247
return self
208248

209-
def _update_page(self):
210-
"""Replace the current page.
249+
def update_page(self, require_empty=True):
250+
"""Move to the next page in the result set.
211251
212-
Does nothing if the current page is non-null and has items
213-
remaining.
252+
If the current page is not empty and ``require_empty`` is :data:`True`
253+
then an exception will be raised. If the current page is not empty
254+
and ``require_empty`` is :data:`False`, then this will return
255+
without updating the current page.
214256
215-
:raises: :class:`~exceptions.StopIteration` if there is no next page.
257+
If the current page **is** empty, but there are no more results,
258+
sets the current page to :data:`None`.
259+
260+
If there are no more pages, throws an exception.
261+
262+
:type require_empty: bool
263+
:param require_empty: (Optional) Flag to indicate if the current page
264+
must be empty before updating.
265+
266+
:raises ValueError: If ``require_empty`` is :data:`True` but the
267+
current page is not empty.
268+
:raises ValueError: If there are no more pages.
216269
"""
217-
if self.page is not None and self.page.remaining > 0:
218-
return
219-
if self.has_next_page():
220-
response = self._get_next_page_response()
221-
self._page = self.PAGE_CLASS(self, response)
270+
if self._page is None:
271+
raise ValueError(_NO_MORE_PAGES_ERR)
272+
273+
# NOTE: This assumes Page.remaining can never go below 0.
274+
page_empty = self._page is _UNSET or self._page.remaining == 0
275+
if page_empty:
276+
if self._has_next_page():
277+
response = self._get_next_page_response()
278+
self._page = self._PAGE_CLASS(self, response, self.ITEMS_KEY)
279+
else:
280+
self._page = None
222281
else:
223-
raise StopIteration
282+
if require_empty:
283+
msg = _PAGE_ERR_TEMPLATE % (self._page, self.page.remaining)
284+
raise ValueError(msg)
285+
286+
def _item_to_value(self, item):
287+
"""Get the next item in the page.
288+
289+
Subclasses will need to implement this method.
290+
291+
:type item: dict
292+
:param item: An item to be converted to a native object.
293+
294+
:raises NotImplementedError: Always
295+
"""
296+
raise NotImplementedError
224297

225298
def next(self):
226-
"""Get the next value in the iterator."""
227-
self._update_page()
299+
"""Get the next item from the request."""
300+
self.update_page(require_empty=False)
301+
if self.page is None:
302+
raise StopIteration
228303
item = six.next(self.page)
229304
self.num_results += 1
230305
return item
231306

232307
# Alias needed for Python 2/3 support.
233308
__next__ = next
234309

235-
def has_next_page(self):
236-
"""Determines whether or not this iterator has more pages.
310+
def _has_next_page(self):
311+
"""Determines whether or not there are more pages with results.
237312
238313
:rtype: boolean
239-
:returns: Whether the iterator has more pages or not.
314+
:returns: Whether the iterator has more pages.
240315
"""
241316
if self.page_number == 0:
242317
return True
@@ -281,4 +356,4 @@ def reset(self):
281356
self.page_number = 0
282357
self.next_page_token = None
283358
self.num_results = 0
284-
self._page = None
359+
self._page = _UNSET

0 commit comments

Comments
 (0)