Coverage for lobster/common/file_collector.py: 67%

38 statements  

« prev     ^ index     » next       coverage.py v7.10.5, created at 2025-08-27 13:02 +0000

1import os.path 

2from pathlib import Path 

3from re import Pattern 

4from typing import Iterable, List 

5 

6from lobster.common.errors import PathError 

7 

8 

9class FileCollector: 

10 def __init__( 

11 self, 

12 extensions: Iterable[str], 

13 directory_exclude_patterns: Iterable[Pattern], 

14 ) -> None: 

15 if extensions is None: 15 ↛ 16line 15 didn't jump to line 16 because the condition on line 15 was never true

16 raise ValueError("'extensions' must not be None") 

17 if directory_exclude_patterns is None: 

18 directory_exclude_patterns = [] 

19 self._extensions = extensions 

20 self._files = [] 

21 self._directory_exclude_patterns = directory_exclude_patterns 

22 for ext in self._extensions: 

23 if not ext.startswith("."): 

24 raise ValueError(f"Extension '{ext}' must start with a dot (.)") 

25 

26 @property 

27 def files(self) -> List[str]: 

28 return self._files 

29 

30 def add_file(self, file: str, throw_on_mismatch: bool) -> None: 

31 if self._is_file_of_interest(file): 

32 self._files.append(file) 

33 elif throw_on_mismatch: 

34 raise PathError( 

35 f"File {file} does not have a valid extension. " 

36 f"Expected one of {', '.join(self._extensions)}." 

37 ) 

38 

39 def _is_file_of_interest(self, file: str) -> bool: 

40 return (not self._extensions) or \ 

41 (os.path.splitext(file)[1].lower() in self._extensions) 

42 

43 def _is_dir_of_interest(self, dir_name: str) -> bool: 

44 return not any(pattern.match(dir_name) 

45 for pattern in self._directory_exclude_patterns) 

46 

47 def add_dir_recursively(self, dir_path: str) -> None: 

48 """Recursively adds files from a directory, filtering by extensions.""" 

49 def walk_directory(path: Path): 

50 for item in Path(path).iterdir(): 

51 if item.is_file(): 

52 self.add_file(str(item.as_posix()), throw_on_mismatch=False) 

53 elif item.is_dir(): 

54 if self._is_dir_of_interest(item.name): 

55 walk_directory(item) 

56 

57 walk_directory(Path(dir_path))