119 lines
3.4 KiB
Python
119 lines
3.4 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
REPL-friendly python mono-module for importing and execution singleton
|
|
"""
|
|
|
|
import os
|
|
from dataclasses import dataclass
|
|
from itertools import groupby
|
|
from typing import Iterable
|
|
|
|
mac_photos = os.getenv("MAC_PHOTOS_PATH", default="mac_photos")
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class PhotosKExt:
|
|
name: str
|
|
parent: tuple[str, ...]
|
|
exts_lowered: tuple[str, ...]
|
|
|
|
@classmethod
|
|
def from_root_file(cls, root: str, filename: str):
|
|
ext_elems = filename.split('.')
|
|
return cls(
|
|
name=ext_elems[0],
|
|
parent=os.path.split(root),
|
|
exts_lowered=tuple(
|
|
ext.lower() for ext in ext_elems[1:]
|
|
)
|
|
)
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class PhotosKey:
|
|
filename: str
|
|
parent: tuple[str, ...]
|
|
|
|
@classmethod
|
|
def from_key_ext(cls, ext: PhotosKExt):
|
|
return cls(filename=ext.name, parent=ext.parent)
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class PhotosValue:
|
|
exts: dict[str, list[str]]
|
|
|
|
def heic_paths(self) -> list[str]:
|
|
return self.exts.get("heic", list())
|
|
|
|
def mov_paths(self) -> list[str]:
|
|
return self.exts.get("mov", list())
|
|
|
|
|
|
def candidate_exts(file_exts: Iterable[PhotosKExt]):
|
|
"""
|
|
>>> {ext: len(files)for ext, files in cexts.items()}
|
|
stdout> {... 'mov': 2769, 'heic': 2354 ...}
|
|
|
|
We could make a bold guess that iPhone 11 stores both `heic` and `mov`
|
|
for materialized best capture and live movie for live capture.
|
|
"""
|
|
exts_rv: dict[str, list[PhotosKExt]] = dict()
|
|
|
|
for file in file_exts:
|
|
for ext in file.exts_lowered:
|
|
exts_rv.setdefault(ext, list()).append(file)
|
|
|
|
return exts_rv
|
|
|
|
|
|
def file_exts(photos_dir: str = mac_photos):
|
|
return {
|
|
PhotosKExt.from_root_file(filename=file, root=root)
|
|
for root, _, files in os.walk(photos_dir, followlinks=True)
|
|
for file in files
|
|
}
|
|
|
|
|
|
def photos_exts(files_with_ext: Iterable[PhotosKExt]):
|
|
return {
|
|
k: list(v)
|
|
for k, v in groupby(files_with_ext, key=lambda x: (x.name, x.parent))
|
|
}
|
|
|
|
|
|
def main():
|
|
fexts = file_exts()
|
|
cexts = candidate_exts(fexts)
|
|
print(f"{photos_exts(fexts)=}")
|
|
print(f"{cexts=}")
|
|
|
|
ext_counts = {ext: len(files)for ext, files in cexts.items()}
|
|
print(ext_counts)
|
|
# stdout>
|
|
"""
|
|
{'jpeg': 6023, 'mov': 2769, 'heic': 2354, 'thm': 197, 'png': 168, 'plist': 163,
|
|
'log': 1, 'aae': 102, 'kgdb': 3, 'mp4': 26, 'data': 3, 'db': 3, 'xml': 26,
|
|
'cmap': 2, 'plj': 16, 'kgdb-wal': 3, 'sqlite-shm': 11, 'cloudphotodb-wal': 1,
|
|
'kgdb-shm': 3, '00001]': 1, 'frag': 2, 'ithmb': 3, 'sqlite': 15, 'lock': 1,
|
|
'sqlite-wal': 11, 'aoi': 3, '0': 1,
|
|
'm3u8-8f37dbfb-b3a6-4d52-beca-d17aaed01606': 2, 'jpg': 2, 'roi': 3, 'poi': 3,
|
|
'db-shm': 1, 'm3u8-37f64716-0b2d-4a82-854a-5a6c78ce505a': 1, 'descriptor': 3,
|
|
'bin': 2, 'm3u8': 5, 'cloudphotodb': 1, 'db-wal': 1, 'nature': 3,
|
|
'm3u8-d8faad08-4fcc-4161-a600-1562d755c97b': 1, 'initfrag': 2, '20201]': 1,
|
|
'cloudphotodb-shm': 1}
|
|
"""
|
|
|
|
movs = cexts['mov']
|
|
heics = cexts['heic']
|
|
heic_name_set = {heic.name.lower() for heic in heics}
|
|
mov_name_set = {mov.name.lower() for mov in movs}
|
|
print(len(heic_name_set - mov_name_set) - len(heic_name_set))
|
|
# stdout> (not 0)
|
|
# Hence, we are not able to match `.heic` with its `.mov` with just names
|
|
# Could it be that the `.heic` also contains the high-quality live?
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|