dotfiles/dev/pixi/repl/apple_photos.py

119 lines
3.4 KiB
Python

#!/usr/bin/env python
"""
REPL-friendly python mono-module for importing and execution singleton
"""
import os
from dataclasses import dataclass
from itertools import groupby
from typing import Iterable
mac_photos = os.getenv("MAC_PHOTOS_PATH", default="mac_photos")
@dataclass(slots=True, frozen=True)
class PhotosKExt:
name: str
parent: tuple[str, ...]
exts_lowered: tuple[str, ...]
@classmethod
def from_root_file(cls, root: str, filename: str):
ext_elems = filename.split('.')
return cls(
name=ext_elems[0],
parent=os.path.split(root),
exts_lowered=tuple(
ext.lower() for ext in ext_elems[1:]
)
)
@dataclass(slots=True, frozen=True)
class PhotosKey:
filename: str
parent: tuple[str, ...]
@classmethod
def from_key_ext(cls, ext: PhotosKExt):
return cls(filename=ext.name, parent=ext.parent)
@dataclass(slots=True)
class PhotosValue:
exts: dict[str, list[str]]
def heic_paths(self) -> list[str]:
return self.exts.get("heic", list())
def mov_paths(self) -> list[str]:
return self.exts.get("mov", list())
def candidate_exts(file_exts: Iterable[PhotosKExt]):
"""
>>> {ext: len(files)for ext, files in cexts.items()}
stdout> {... 'mov': 2769, 'heic': 2354 ...}
We could make a bold guess that iPhone 11 stores both `heic` and `mov`
for materialized best capture and live movie for live capture.
"""
exts_rv: dict[str, list[PhotosKExt]] = dict()
for file in file_exts:
for ext in file.exts_lowered:
exts_rv.setdefault(ext, list()).append(file)
return exts_rv
def file_exts(photos_dir: str = mac_photos):
return {
PhotosKExt.from_root_file(filename=file, root=root)
for root, _, files in os.walk(photos_dir, followlinks=True)
for file in files
}
def photos_exts(files_with_ext: Iterable[PhotosKExt]):
return {
k: list(v)
for k, v in groupby(files_with_ext, key=lambda x: (x.name, x.parent))
}
def main():
fexts = file_exts()
cexts = candidate_exts(fexts)
print(f"{photos_exts(fexts)=}")
print(f"{cexts=}")
ext_counts = {ext: len(files)for ext, files in cexts.items()}
print(ext_counts)
# stdout>
"""
{'jpeg': 6023, 'mov': 2769, 'heic': 2354, 'thm': 197, 'png': 168, 'plist': 163,
'log': 1, 'aae': 102, 'kgdb': 3, 'mp4': 26, 'data': 3, 'db': 3, 'xml': 26,
'cmap': 2, 'plj': 16, 'kgdb-wal': 3, 'sqlite-shm': 11, 'cloudphotodb-wal': 1,
'kgdb-shm': 3, '00001]': 1, 'frag': 2, 'ithmb': 3, 'sqlite': 15, 'lock': 1,
'sqlite-wal': 11, 'aoi': 3, '0': 1,
'm3u8-8f37dbfb-b3a6-4d52-beca-d17aaed01606': 2, 'jpg': 2, 'roi': 3, 'poi': 3,
'db-shm': 1, 'm3u8-37f64716-0b2d-4a82-854a-5a6c78ce505a': 1, 'descriptor': 3,
'bin': 2, 'm3u8': 5, 'cloudphotodb': 1, 'db-wal': 1, 'nature': 3,
'm3u8-d8faad08-4fcc-4161-a600-1562d755c97b': 1, 'initfrag': 2, '20201]': 1,
'cloudphotodb-shm': 1}
"""
movs = cexts['mov']
heics = cexts['heic']
heic_name_set = {heic.name.lower() for heic in heics}
mov_name_set = {mov.name.lower() for mov in movs}
print(len(heic_name_set - mov_name_set) - len(heic_name_set))
# stdout> (not 0)
# Hence, we are not able to match `.heic` with its `.mov` with just names
# Could it be that the `.heic` also contains the high-quality live?
if __name__ == "__main__":
main()