1616use OC \Preview \Db \Preview ;
1717use OC \Preview \Db \PreviewMapper ;
1818use OCP \DB \Exception ;
19+ use OCP \DB \QueryBuilder \IQueryBuilder ;
1920use OCP \Files \IMimeTypeDetector ;
2021use OCP \Files \IMimeTypeLoader ;
2122use OCP \Files \IRootFolder ;
3031use RecursiveIteratorIterator ;
3132
3233class LocalPreviewStorage implements IPreviewStorage {
34+ private const SCAN_BATCH_SIZE = 1000 ;
35+
3336 public function __construct (
3437 private readonly IConfig $ config ,
3538 private readonly PreviewMapper $ previewMapper ,
@@ -117,88 +120,242 @@ public function scan(): int {
117120 if (!file_exists ($ this ->getPreviewRootFolder ())) {
118121 return 0 ;
119122 }
123+
120124 $ scanner = new RecursiveDirectoryIterator ($ this ->getPreviewRootFolder ());
121125 $ previewsFound = 0 ;
122- $ skipFiles = [];
126+
127+ /**
128+ * Use an associative array keyed by path for O(1) lookup instead of
129+ * the O(n) in_array() the original code used.
130+ *
131+ * @var array<string, true> $skipPaths
132+ */
133+ $ skipPaths = [];
134+
135+ /**
136+ * Pending previews grouped by fileId. A single original file can have
137+ * many preview variants (different sizes/formats), so we group them to
138+ * issue one filecache lookup per original file rather than one per
139+ * preview variant.
140+ *
141+ * @var array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
142+ */
143+ $ pendingByFileId = [];
144+
145+ /**
146+ * path_hash => realPath for legacy filecache entries that need to be
147+ * cleaned up. Only populated when $checkForFileCache is true.
148+ *
149+ * @var array<string, string> $pendingPathHashes
150+ */
151+ $ pendingPathHashes = [];
152+ $ pendingCount = 0 ;
153+
123154 foreach (new RecursiveIteratorIterator ($ scanner ) as $ file ) {
124- if ($ file ->isFile () && !in_array ((string )$ file , $ skipFiles , true )) {
125- $ preview = Preview::fromPath ((string )$ file , $ this ->mimeTypeDetector );
126- if ($ preview === false ) {
127- $ this ->logger ->error ('Unable to parse preview information for ' . $ file ->getRealPath ());
128- continue ;
129- }
155+ if (!$ file ->isFile ()) {
156+ continue ;
157+ }
158+
159+ $ filePath = (string )$ file ;
160+ if (isset ($ skipPaths [$ filePath ])) {
161+ continue ;
162+ }
163+
164+ $ preview = Preview::fromPath ($ filePath , $ this ->mimeTypeDetector );
165+ if ($ preview === false ) {
166+ $ this ->logger ->error ('Unable to parse preview information for ' . $ file ->getRealPath ());
167+ continue ;
168+ }
169+
170+ $ preview ->setSize ($ file ->getSize ());
171+ $ preview ->setMtime ($ file ->getMtime ());
172+ $ preview ->setEncrypted (false );
173+
174+ $ realPath = $ file ->getRealPath ();
175+ $ pendingByFileId [$ preview ->getFileId ()][] = [
176+ 'preview ' => $ preview ,
177+ 'filePath ' => $ filePath ,
178+ 'realPath ' => $ realPath ,
179+ ];
180+ $ pendingCount ++;
181+
182+ if ($ checkForFileCache ) {
183+ $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ realPath );
184+ $ pendingPathHashes [md5 ($ relativePath )] = $ realPath ;
185+ }
186+
187+ if ($ pendingCount >= self ::SCAN_BATCH_SIZE ) {
188+ $ this ->connection ->beginTransaction ();
130189 try {
131- $ preview ->setSize ($ file ->getSize ());
132- $ preview ->setMtime ($ file ->getMtime ());
133- $ preview ->setEncrypted (false );
134-
135- $ qb = $ this ->connection ->getQueryBuilder ();
136- $ result = $ qb ->select ('storage ' , 'etag ' , 'mimetype ' )
137- ->from ('filecache ' )
138- ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ preview ->getFileId ())))
139- ->setMaxResults (1 )
140- ->runAcrossAllShards () // Unavoidable because we can't extract the storage_id from the preview name
141- ->executeQuery ()
142- ->fetchAssociative ();
143-
144- if ($ result === false ) {
145- // original file is deleted
146- $ this ->logger ->warning ('Original file ' . $ preview ->getFileId () . ' was not found. Deleting preview at ' . $ file ->getRealPath ());
147- @unlink ($ file ->getRealPath ());
148- continue ;
149- }
190+ $ previewsFound += $ this ->processScanBatch ($ pendingByFileId , $ pendingPathHashes , $ checkForFileCache , $ skipPaths );
191+ $ this ->connection ->commit ();
192+ } catch (\Exception $ e ) {
193+ $ this ->connection ->rollBack ();
194+ $ this ->logger ->error ($ e ->getMessage (), ['exception ' => $ e ]);
195+ throw $ e ;
196+ }
197+ $ pendingByFileId = [];
198+ $ pendingPathHashes = [];
199+ $ pendingCount = 0 ;
200+ }
201+ }
202+
203+ if ($ pendingCount > 0 ) {
204+ $ this ->connection ->beginTransaction ();
205+ try {
206+ $ previewsFound += $ this ->processScanBatch ($ pendingByFileId , $ pendingPathHashes , $ checkForFileCache , $ skipPaths );
207+ $ this ->connection ->commit ();
208+ } catch (\Exception $ e ) {
209+ $ this ->connection ->rollBack ();
210+ $ this ->logger ->error ($ e ->getMessage (), ['exception ' => $ e ]);
211+ throw $ e ;
212+ }
213+ }
214+
215+ return $ previewsFound ;
216+ }
217+
218+ /**
219+ * Process one batch of preview files collected during scan().
220+ *
221+ * @param array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
222+ * @param array<string, string> $pendingPathHashes path_hash => realPath
223+ * @param array<string, true> $skipPaths Modified in place: newly-moved paths are added so the outer iterator skips them.
224+ */
225+ private function processScanBatch (
226+ array $ pendingByFileId ,
227+ array $ pendingPathHashes ,
228+ bool $ checkForFileCache ,
229+ array &$ skipPaths ,
230+ ): int {
231+ $ filecacheByFileId = $ this ->fetchFilecacheByFileIds (array_keys ($ pendingByFileId ));
232+ $ legacyByPathHash = [];
233+ if ($ checkForFileCache && $ pendingPathHashes !== []) {
234+ $ legacyByPathHash = $ this ->fetchFilecacheByPathHashes (array_keys ($ pendingPathHashes ));
235+ }
236+
237+ $ previewsFound = 0 ;
238+ foreach ($ pendingByFileId as $ fileId => $ items ) {
239+ if (!isset ($ filecacheByFileId [$ fileId ])) {
240+ // Original file has been deleted – clean up all its previews.
241+ foreach ($ items as $ item ) {
242+ $ this ->logger ->warning ('Original file ' . $ fileId . ' was not found. Deleting preview at ' . $ item ['realPath ' ]);
243+ @unlink ($ item ['realPath ' ]);
244+ }
245+ continue ;
246+ }
247+
248+ $ filecacheRow = $ filecacheByFileId [$ fileId ];
249+ foreach ($ items as $ item ) {
250+ /** @var Preview $preview */
251+ $ preview = $ item ['preview ' ];
150252
151- if ($ checkForFileCache ) {
152- $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ file ->getRealPath ());
253+ if ($ checkForFileCache ) {
254+ $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ item ['realPath ' ]);
255+ $ pathHash = md5 ($ relativePath );
256+ if (isset ($ legacyByPathHash [$ pathHash ])) {
257+ $ legacyRow = $ legacyByPathHash [$ pathHash ];
153258 $ qb = $ this ->connection ->getQueryBuilder ();
154- $ result2 = $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' , 'parent ' )
155- ->from ('filecache ' )
156- ->where ($ qb ->expr ()->eq ('path_hash ' , $ qb ->createNamedParameter (md5 ($ relativePath ))))
157- ->runAcrossAllShards ()
158- ->setMaxResults (1 )
159- ->executeQuery ()
160- ->fetchAssociative ();
161-
162- if ($ result2 !== false ) {
163- $ qb ->delete ('filecache ' )
164- ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ result2 ['fileid ' ])))
165- ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ result2 ['storage ' ])))
166- ->executeStatement ();
167- $ this ->deleteParentsFromFileCache ((int )$ result2 ['parent ' ], (int )$ result2 ['storage ' ]);
168- }
259+ $ qb ->delete ('filecache ' )
260+ ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ legacyRow ['fileid ' ])))
261+ ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ legacyRow ['storage ' ])))
262+ ->executeStatement ();
263+ $ this ->deleteParentsFromFileCache ((int )$ legacyRow ['parent ' ], (int )$ legacyRow ['storage ' ]);
169264 }
265+ }
170266
171- $ preview ->setStorageId ((int )$ result ['storage ' ]);
172- $ preview ->setEtag ($ result ['etag ' ]);
173- $ preview ->setSourceMimetype ($ this ->mimeTypeLoader ->getMimetypeById ((int )$ result ['mimetype ' ]));
174- $ preview ->generateId ();
175- // try to insert, if that fails the preview is already in the DB
176- $ this ->previewMapper ->insert ($ preview );
267+ $ preview ->setStorageId ((int )$ filecacheRow ['storage ' ]);
268+ $ preview ->setEtag ($ filecacheRow ['etag ' ]);
269+ $ preview ->setSourceMimetype ($ this ->mimeTypeLoader ->getMimetypeById ((int )$ filecacheRow ['mimetype ' ]));
270+ $ preview ->generateId ();
177271
178- // Move old flat preview to new format
179- $ dirName = str_replace ($ this ->getPreviewRootFolder (), '' , $ file ->getPath ());
180- if (preg_match ('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/ ' , $ dirName ) !== 1 ) {
181- $ previewPath = $ this ->constructPath ($ preview );
182- $ this ->createParentFiles ($ previewPath );
183- $ ok = rename ($ file ->getRealPath (), $ previewPath );
184- if (!$ ok ) {
185- throw new LogicException ('Failed to move ' . $ file ->getRealPath () . ' to ' . $ previewPath );
186- }
187-
188- $ skipFiles [] = $ previewPath ;
189- }
272+ $ this ->connection ->beginTransaction ();
273+ try {
274+ $ this ->previewMapper ->insert ($ preview );
275+ $ this ->connection ->commit ();
190276 } catch (Exception $ e ) {
277+ $ this ->connection ->rollBack ();
191278 if ($ e ->getReason () !== Exception::REASON_UNIQUE_CONSTRAINT_VIOLATION ) {
192279 throw $ e ;
193280 }
194281 }
282+
283+ // Move old flat preview to new nested directory format.
284+ $ dirName = str_replace ($ this ->getPreviewRootFolder (), '' , $ item ['filePath ' ]);
285+ if (preg_match ('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/ ' , $ dirName ) !== 1 ) {
286+ $ previewPath = $ this ->constructPath ($ preview );
287+ $ this ->createParentFiles ($ previewPath );
288+ $ ok = rename ($ item ['realPath ' ], $ previewPath );
289+ if (!$ ok ) {
290+ throw new LogicException ('Failed to move ' . $ item ['realPath ' ] . ' to ' . $ previewPath );
291+ }
292+ // Mark the destination so the outer iterator skips it if it encounters the path later.
293+ $ skipPaths [$ previewPath ] = true ;
294+ }
295+
195296 $ previewsFound ++;
196297 }
197298 }
198299
199300 return $ previewsFound ;
200301 }
201302
303+ /**
304+ * Bulk-fetch filecache rows for a set of fileIds.
305+ *
306+ * @param int[] $fileIds
307+ */
308+ private function fetchFilecacheByFileIds (array $ fileIds ): array {
309+ if (empty ($ fileIds )) {
310+ return [];
311+ }
312+
313+ $ result = [];
314+ $ qb = $ this ->connection ->getQueryBuilder ();
315+ $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' )
316+ ->from ('filecache ' );
317+ foreach (array_chunk ($ fileIds , 1000 ) as $ chunk ) {
318+ $ qb ->andWhere (
319+ $ qb ->expr ()->in ('fileid ' , $ qb ->createNamedParameter ($ chunk , IQueryBuilder::PARAM_INT_ARRAY ))
320+ );
321+ }
322+ $ rows = $ qb ->runAcrossAllShards ()
323+ ->executeQuery ();
324+ while ($ row = $ rows ->fetchAssociative ()) {
325+ $ result [(int )$ row ['fileid ' ]] = $ row ;
326+ }
327+ $ rows ->closeCursor ();
328+ return $ result ;
329+ }
330+
331+ /**
332+ * Bulk-fetch filecache rows for a set of path_hashes (legacy migration).
333+ *
334+ * @param string[] $pathHashes
335+ */
336+ private function fetchFilecacheByPathHashes (array $ pathHashes ): array {
337+ if (empty ($ pathHashes )) {
338+ return [];
339+ }
340+
341+ $ result = [];
342+ $ qb = $ this ->connection ->getQueryBuilder ();
343+ $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' , 'parent ' , 'path_hash ' )
344+ ->from ('filecache ' );
345+ foreach (array_chunk ($ pathHashes , 1000 ) as $ chunk ) {
346+ $ qb ->andWhere (
347+ $ qb ->expr ()->in ('path_hash ' , $ qb ->createNamedParameter ($ chunk , IQueryBuilder::PARAM_STR_ARRAY ))
348+ );
349+ }
350+ $ rows = $ qb ->runAcrossAllShards ()
351+ ->executeQuery ();
352+ while ($ row = $ rows ->fetchAssociative ()) {
353+ $ result [$ row ['path_hash ' ]] = $ row ;
354+ }
355+ $ rows ->closeCursor ();
356+ return $ result ;
357+ }
358+
202359 /**
203360 * Recursive method that deletes the folder and its parent folders if it's not
204361 * empty.
@@ -210,10 +367,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
210367 ->where ($ qb ->expr ()->eq ('parent ' , $ qb ->createNamedParameter ($ folderId )))
211368 ->setMaxResults (1 )
212369 ->runAcrossAllShards ()
213- ->executeQuery ()
214- ->fetchAssociative ();
370+ ->executeQuery ();
371+ $ row = $ result ->fetchAssociative ();
372+ $ result ->closeCursor ();
215373
216- if ($ result !== false ) {
374+ if ($ row !== false ) {
217375 // there are other files in the directory, don't delete yet
218376 return ;
219377 }
@@ -225,11 +383,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
225383 ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ folderId )))
226384 ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ storageId )))
227385 ->setMaxResults (1 )
228- ->executeQuery ()
229- ->fetchAssociative ();
230-
231- if ($ result !== false ) {
232- $ parentFolderId = (int )$ result ['parent ' ];
386+ ->executeQuery ();
387+ $ row = $ result ->fetchAssociative ();
388+ $ result -> closeCursor ();
389+ if ($ row !== false ) {
390+ $ parentFolderId = (int )$ row ['parent ' ];
233391
234392 $ qb = $ this ->connection ->getQueryBuilder ();
235393 $ qb ->delete ('filecache ' )
0 commit comments