|
15 | 15 | :class:`SceneManager <scenedetect.scene_manager.SceneManager>`. |
16 | 16 | """ |
17 | 17 |
|
18 | | -import math |
19 | | -import queue |
20 | | -import threading |
21 | 18 | import typing as ty |
22 | | -from dataclasses import dataclass |
23 | 19 | from logging import getLogger |
24 | | -from pathlib import Path |
25 | | -from string import Template |
26 | 20 |
|
27 | 21 | # OpenCV is a required package, but we don't have it as an explicit dependency since we |
28 | 22 | # need to support both opencv-python and opencv-python-headless. Include some additional |
@@ -182,299 +176,3 @@ def detect( |
182 | 176 | if scene_manager.stats_manager is not None: |
183 | 177 | scene_manager.stats_manager.save_to_csv(csv_file=stats_file_path) |
184 | 178 | return scene_manager.get_scene_list(start_in_scene=start_in_scene) |
185 | | - |
186 | | - |
187 | | -# TODO: Just merge these variables into the extractor. |
188 | | -@dataclass |
189 | | -class ImageExtractorConfig: |
190 | | - num_images: int = 3 |
191 | | - """Number of images to generate for each scene. Minimum is 1.""" |
192 | | - frame_margin: int = 1 |
193 | | - """Number of frames to pad each scene around the beginning |
194 | | - and end (e.g. moves the first/last image into the scene by N frames). |
195 | | - Can set to 0, but will result in some video files failing to extract |
196 | | - the very last frame.""" |
197 | | - image_extension: str = "jpg" |
198 | | - """Type of image to save (must be one of 'jpg', 'png', or 'webp').""" |
199 | | - encoder_param: int = 95 |
200 | | - """Quality/compression efficiency, based on type of image: |
201 | | - 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. |
202 | | - 'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode.""" |
203 | | - image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER" |
204 | | - """Template to use for naming image files. Can use the template variables |
205 | | - $VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS. |
206 | | - Should not include an extension.""" |
207 | | - scale: ty.Optional[float] = None |
208 | | - """Optional factor by which to rescale saved images. A scaling factor of 1 would |
209 | | - not result in rescaling. A value < 1 results in a smaller saved image, while a |
210 | | - value > 1 results in an image larger than the original. This value is ignored if |
211 | | - either the height or width values are specified.""" |
212 | | - height: ty.Optional[int] = None |
213 | | - """Optional value for the height of the saved images. Specifying both the height |
214 | | - and width will resize images to an exact size, regardless of aspect ratio. |
215 | | - Specifying only height will rescale the image to that number of pixels in height |
216 | | - while preserving the aspect ratio.""" |
217 | | - width: ty.Optional[int] = None |
218 | | - """Optional value for the width of the saved images. Specifying both the width |
219 | | - and height will resize images to an exact size, regardless of aspect ratio. |
220 | | - Specifying only width will rescale the image to that number of pixels wide |
221 | | - while preserving the aspect ratio.""" |
222 | | - interpolation: Interpolation = Interpolation.CUBIC |
223 | | - """Type of interpolation to use when resizing images.""" |
224 | | - |
225 | | - |
226 | | -class ImageExtractor: |
227 | | - def __init__( |
228 | | - self, |
229 | | - num_images: int = 3, |
230 | | - frame_margin: int = 1, |
231 | | - image_extension: str = "jpg", |
232 | | - encoder_param: int = 95, |
233 | | - image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER", |
234 | | - scale: ty.Optional[float] = None, |
235 | | - height: ty.Optional[int] = None, |
236 | | - width: ty.Optional[int] = None, |
237 | | - interpolation: Interpolation = Interpolation.CUBIC, |
238 | | - ): |
239 | | - """Helper type to handle saving images for a set of scenes. This object is *not* thread-safe. |
240 | | -
|
241 | | - Arguments: |
242 | | - num_images: Number of images to generate for each scene. Minimum is 1. |
243 | | - frame_margin: Number of frames to pad each scene around the beginning |
244 | | - and end (e.g. moves the first/last image into the scene by N frames). |
245 | | - Can set to 0, but will result in some video files failing to extract |
246 | | - the very last frame. |
247 | | - image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp'). |
248 | | - encoder_param: Quality/compression efficiency, based on type of image: |
249 | | - 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. |
250 | | - 'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode. |
251 | | - image_name_template: Template to use for output filanames. Can use template variables |
252 | | - $VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS. |
253 | | - *NOTE*: Should not include the image extension (set `image_extension` instead). |
254 | | - scale: Optional factor by which to rescale saved images. A scaling factor of 1 would |
255 | | - not result in rescaling. A value < 1 results in a smaller saved image, while a |
256 | | - value > 1 results in an image larger than the original. This value is ignored if |
257 | | - either the height or width values are specified. |
258 | | - height: Optional value for the height of the saved images. Specifying both the height |
259 | | - and width will resize images to an exact size, regardless of aspect ratio. |
260 | | - Specifying only height will rescale the image to that number of pixels in height |
261 | | - while preserving the aspect ratio. |
262 | | - width: Optional value for the width of the saved images. Specifying both the width |
263 | | - and height will resize images to an exact size, regardless of aspect ratio. |
264 | | - Specifying only width will rescale the image to that number of pixels wide |
265 | | - while preserving the aspect ratio. |
266 | | - interpolation: Type of interpolation to use when resizing images. |
267 | | - """ |
268 | | - self._num_images = num_images |
269 | | - self._frame_margin = frame_margin |
270 | | - self._image_extension = image_extension |
271 | | - self._encoder_param = encoder_param |
272 | | - self._image_name_template = image_name_template |
273 | | - self._scale = scale |
274 | | - self._height = height |
275 | | - self._width = width |
276 | | - self._interpolation = interpolation |
277 | | - |
278 | | - def run( |
279 | | - self, |
280 | | - video: VideoStream, |
281 | | - scene_list: SceneList, |
282 | | - output_dir: ty.Optional[str] = None, |
283 | | - show_progress=False, |
284 | | - ) -> ty.Dict[int, ty.List[str]]: |
285 | | - if not scene_list: |
286 | | - return {} |
287 | | - if self._num_images <= 0 or self._frame_margin < 0: |
288 | | - raise ValueError() |
289 | | - |
290 | | - video.reset() |
291 | | - |
292 | | - # Setup flags and init progress bar if available. |
293 | | - completed = True |
294 | | - logger.info( |
295 | | - f"Saving {self._num_images} images per scene [format={self._image_extension}] {output_dir if output_dir else ''} " |
296 | | - ) |
297 | | - progress_bar = None |
298 | | - if show_progress: |
299 | | - progress_bar = tqdm( |
300 | | - total=len(scene_list) * self._num_images, unit="images", dynamic_ncols=True |
301 | | - ) |
302 | | - |
303 | | - filename_template = Template(self._image_name_template) |
304 | | - scene_num_format = "%0" |
305 | | - scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + "d" |
306 | | - image_num_format = "%0" |
307 | | - image_num_format += str(math.floor(math.log(self._num_images, 10)) + 2) + "d" |
308 | | - |
309 | | - timecode_list = self.generate_timecode_list(scene_list) |
310 | | - image_filenames = {i: [] for i in range(len(timecode_list))} |
311 | | - logger.debug("Writing images with template %s", filename_template.template) |
312 | | - |
313 | | - MAX_QUEUED_ENCODE_FRAMES = 4 |
314 | | - MAX_QUEUED_SAVE_IMAGES = 4 |
315 | | - encode_queue = queue.Queue(MAX_QUEUED_ENCODE_FRAMES) |
316 | | - save_queue = queue.Queue(MAX_QUEUED_SAVE_IMAGES) |
317 | | - encode_thread = threading.Thread( |
318 | | - target=self._image_encode_thread, |
319 | | - args=(video, encode_queue, save_queue, self._image_extension), |
320 | | - daemon=True, |
321 | | - ) |
322 | | - save_thread = threading.Thread( |
323 | | - target=self._save_files_thread, |
324 | | - args=(save_queue, progress_bar), |
325 | | - daemon=True, |
326 | | - ) |
327 | | - encode_thread.start() |
328 | | - save_thread.start() |
329 | | - |
330 | | - for i, scene_timecodes in enumerate(timecode_list): |
331 | | - for j, image_timecode in enumerate(scene_timecodes): |
332 | | - video.seek(image_timecode) |
333 | | - frame_im = video.read() |
334 | | - if frame_im is not None and frame_im is not False: |
335 | | - # TODO: Add extension to template. |
336 | | - # TODO: Allow NUM to be a valid suffix in addition to NUMBER. |
337 | | - file_path = "%s.%s" % ( |
338 | | - filename_template.safe_substitute( |
339 | | - VIDEO_NAME=video.name, |
340 | | - SCENE_NUMBER=scene_num_format % (i + 1), |
341 | | - IMAGE_NUMBER=image_num_format % (j + 1), |
342 | | - FRAME_NUMBER=image_timecode.get_frames(), |
343 | | - TIMESTAMP_MS=int(image_timecode.get_seconds() * 1000), |
344 | | - TIMECODE=image_timecode.get_timecode().replace(":", ";"), |
345 | | - ), |
346 | | - self._image_extension, |
347 | | - ) |
348 | | - image_filenames[i].append(file_path) |
349 | | - encode_queue.put((frame_im, get_and_create_path(file_path, output_dir))) |
350 | | - else: |
351 | | - completed = False |
352 | | - break |
353 | | - |
354 | | - # *WARNING*: We do not handle errors or exceptions yet, and this can deadlock on errors! |
355 | | - encode_queue.put((None, None)) |
356 | | - save_queue.put((None, None)) |
357 | | - encode_thread.join() |
358 | | - save_thread.join() |
359 | | - if progress_bar is not None: |
360 | | - progress_bar.close() |
361 | | - if not completed: |
362 | | - logger.error("Could not generate all output images.") |
363 | | - |
364 | | - return image_filenames |
365 | | - |
366 | | - def _image_encode_thread( |
367 | | - self, |
368 | | - video: VideoStream, |
369 | | - encode_queue: queue.Queue, |
370 | | - save_queue: queue.Queue, |
371 | | - image_extension: str, |
372 | | - ): |
373 | | - aspect_ratio = video.aspect_ratio |
374 | | - if abs(aspect_ratio - 1.0) < 0.01: |
375 | | - aspect_ratio = None |
376 | | - # TODO: Validate that encoder_param is within the proper range. |
377 | | - # Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png. |
378 | | - imwrite_param = ( |
379 | | - [get_cv2_imwrite_params()[self._image_extension], self._encoder_param] |
380 | | - if self._encoder_param is not None |
381 | | - else [] |
382 | | - ) |
383 | | - while True: |
384 | | - frame_im, dest_path = encode_queue.get() |
385 | | - if frame_im is None: |
386 | | - return |
387 | | - frame_im = self.resize_image( |
388 | | - frame_im, |
389 | | - aspect_ratio, |
390 | | - ) |
391 | | - (is_ok, encoded) = cv2.imencode(f".{image_extension}", frame_im, imwrite_param) |
392 | | - if not is_ok: |
393 | | - continue |
394 | | - save_queue.put((encoded, dest_path)) |
395 | | - |
396 | | - def _save_files_thread(self, save_queue: queue.Queue, progress_bar: tqdm): |
397 | | - while True: |
398 | | - encoded, dest_path = save_queue.get() |
399 | | - if encoded is None: |
400 | | - return |
401 | | - if encoded is not False: |
402 | | - encoded.tofile(Path(dest_path)) |
403 | | - if progress_bar is not None: |
404 | | - progress_bar.update(1) |
405 | | - |
406 | | - def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.Iterable[FrameTimecode]]: |
407 | | - """Generates a list of timecodes for each scene in `scene_list` based on the current config |
408 | | - parameters.""" |
409 | | - framerate = scene_list[0][0].framerate |
410 | | - # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly. |
411 | | - return [ |
412 | | - ( |
413 | | - FrameTimecode(int(f), fps=framerate) |
414 | | - for f in ( |
415 | | - # middle frames |
416 | | - a[len(a) // 2] |
417 | | - if (0 < j < self._num_images - 1) or self._num_images == 1 |
418 | | - # first frame |
419 | | - else min(a[0] + self._frame_margin, a[-1]) |
420 | | - if j == 0 |
421 | | - # last frame |
422 | | - else max(a[-1] - self._frame_margin, a[0]) |
423 | | - # for each evenly-split array of frames in the scene list |
424 | | - for j, a in enumerate(np.array_split(r, self._num_images)) |
425 | | - ) |
426 | | - ) |
427 | | - for r in ( |
428 | | - # pad ranges to number of images |
429 | | - r |
430 | | - if 1 + r[-1] - r[0] >= self._num_images |
431 | | - else list(r) + [r[-1]] * (self._num_images - len(r)) |
432 | | - # create range of frames in scene |
433 | | - for r in ( |
434 | | - range( |
435 | | - start.get_frames(), |
436 | | - start.get_frames() |
437 | | - + max( |
438 | | - 1, # guard against zero length scenes |
439 | | - end.get_frames() - start.get_frames(), |
440 | | - ), |
441 | | - ) |
442 | | - # for each scene in scene list |
443 | | - for start, end in scene_list |
444 | | - ) |
445 | | - ) |
446 | | - ] |
447 | | - |
448 | | - def resize_image( |
449 | | - self, |
450 | | - image: cv2.Mat, |
451 | | - aspect_ratio: float, |
452 | | - ) -> cv2.Mat: |
453 | | - """Resizes the given `image` according to the current config parameters. `aspect_ratio` is |
454 | | - used to correct for non-square pixels.""" |
455 | | - # TODO: Combine this resize with the ones below. |
456 | | - if aspect_ratio is not None: |
457 | | - image = cv2.resize( |
458 | | - image, (0, 0), fx=aspect_ratio, fy=1.0, interpolation=self._interpolation.value |
459 | | - ) |
460 | | - image_height = image.shape[0] |
461 | | - image_width = image.shape[1] |
462 | | - # Figure out what kind of resizing needs to be done |
463 | | - if self._height or self._width: |
464 | | - if self._height and not self._width: |
465 | | - factor = self._height / float(image_height) |
466 | | - width = int(factor * image_width) |
467 | | - if self._width and not self._height: |
468 | | - factor = width / float(image_width) |
469 | | - height = int(factor * image_height) |
470 | | - assert height > 0 and width > 0 |
471 | | - image = cv2.resize(image, (width, height), interpolation=self._interpolation.value) |
472 | | - elif self._scale: |
473 | | - image = cv2.resize( |
474 | | - image, |
475 | | - (0, 0), |
476 | | - fx=self._scale, |
477 | | - fy=self._scale, |
478 | | - interpolation=self._interpolation.value, |
479 | | - ) |
480 | | - return image |
0 commit comments