From b9c193d45f6706c34aa228eff7e26f67959329b0 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 17 Feb 2024 23:47:30 +0100 Subject: [PATCH] Implemented image processor namespaces --- src/formatitko/context.py | 4 + src/formatitko/formatitko.py | 4 +- src/formatitko/html_generator.py | 30 +++--- src/formatitko/images.py | 171 +++++++++++++++++++++++-------- src/formatitko/tex_generator.py | 20 ++-- 5 files changed, 163 insertions(+), 66 deletions(-) diff --git a/src/formatitko/context.py b/src/formatitko/context.py index caa321c..31c1587 100644 --- a/src/formatitko/context.py +++ b/src/formatitko/context.py @@ -28,6 +28,8 @@ class Context: path: str dir: str filename: str + root_dir: str # Absolute path to the dir of the file formátítko was called on + rel_dir: str # Relative path to the current dir from the root dir def __init__(self, doc: Doc, path: str, parent: Union['Context', None]=None, trusted: bool=True): self.parent = parent @@ -37,6 +39,8 @@ class Context: self.path = path self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "." self.filename = os.path.basename(path) + self.root_dir = parent.root_dir if parent else os.path.abspath(self.dir) + self.rel_dir = os.path.relpath(self.dir, self.root_dir) if self.get_metadata("flags", immediate=True) is None: self.set_metadata("flags", {}) diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 139d69a..486c5a4 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -10,7 +10,7 @@ import shutil from .util import import_md from .context import Context, BlockGroup from .katex import KatexClient -from .images import ImageProcessor +from .images import ImageProcessor, ImageProcessorNamespace from .output_generator import OutputGenerator, FormatitkoRecursiveError from .html_generator import HTMLGenerator from .transform_processor import TransformProcessor @@ -62,7 +62,7 @@ def main(): e.pretty_print() # Initialize the image processor (this just keeps some basic state) - imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs) + imageProcessor = ImageProcessor({"": ImageProcessorNamespace(args.img_public_dir, args.img_web_path, args.img_cache_dir, args.img_lookup_dirs, True)}) if args.output_html is not None: # Initialize KaTeX client (this runs the node app and connects to a unix socket) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index d564044..54002ce 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -17,7 +17,7 @@ from .whitespace import NBSP from .context import Group, BlockGroup, InlineGroup from .output_generator import OutputGenerator from .katex import KatexClient -from .images import ImageProcessor +from .images import ImageProcessor, ImageProcessorNamespaceSearcher from .util import inlinify class HTMLGenerator(OutputGenerator): @@ -137,8 +137,12 @@ class HTMLGenerator(OutputGenerator): additional_args = self.get_image_processor_args(e.attributes) - # The directory of the current file, will also look for images there. + # The directory of the current file relative to the current working directory source_dir = self.context.dir + # The directory of the current file relative to the md file we were called on + rel_dir = self.context.rel_dir + + searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir) _, ext = os.path.splitext(url) ext = ext[1:] @@ -148,16 +152,16 @@ class HTMLGenerator(OutputGenerator): # Even supported elements have to be 'converted' because the # processing contains finding and moving them to the output # directory. - url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) + url = self.imageProcessor.process_image(url, ext, searcher, **additional_args) elif ext in ["pdf", "epdf","asy"]: # Only relevant for when these were PNGs, leaving this here for future reference. # if not "dpi" in additional_args: # additional_args["dpi"] = 300 - url = self.imageProcessor.process_image(url, "svg", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args) elif ext in ["jpg"]: - url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args) else: - url = self.imageProcessor.process_image(url, "png", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "png", searcher, **additional_args) # Srcset generation - multiple alternative sizes of images browsers can # choose from. @@ -168,19 +172,19 @@ class HTMLGenerator(OutputGenerator): # This is inspired by @vojta001's blogPhoto shortcode he made for # patek.cz: # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html - width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir]) + width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir())) sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) for size in sizes: if width <= size[0] and height <= size[1]: - srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w')) + srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w')) break quality = size[2] if ext == "jpeg" else None - cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality) - self.imageProcessor.publish_image(cache_img) - srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w')) + cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality) + searcher.publish_image(cache_img) + srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w')) - self.imageProcessor.publish_image(url) - url = self.imageProcessor.web_path + "/" + url + searcher.publish_image(url) + url = searcher.get_web_path() + "/" + url attributes = self.common_attributes(e) if "width" in e.attributes: diff --git a/src/formatitko/images.py b/src/formatitko/images.py index 73220dc..0f65b7b 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -4,46 +4,161 @@ import shutil import subprocess from PIL import Image + class FileInWrongDirError(Exception): pass + class ConversionProgramError(Exception): pass + class InkscapeError(ConversionProgramError): pass + class ImageMagickError(ConversionProgramError): pass + class AsyError(ConversionProgramError): pass - -class ImageProcessor: +class ImageProcessorNamespace: public_dir: str cache_dir: str lookup_dirs: list[str] web_path: str + include_src: bool - def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: list[str]): + def __init__(self, public_dir: str, web_path: str, cache_dir: str, lookup_dirs: list[str], include_src: bool): self.public_dir = public_dir self.cache_dir = cache_dir self.lookup_dirs = lookup_dirs self.web_path = web_path if web_path[-1] != "/" else web_path[:-1] + self.include_src = include_src if not os.path.exists(self.public_dir): os.mkdir(self.public_dir) if not os.path.exists(self.cache_dir): os.mkdir(self.cache_dir) - def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: + +class ImageProcessorSearcher: + def get_lookup_dirs(self) -> list[str]: + return [] + + def get_cache_dir(self) -> str: + return "" + + def get_public_dir(self) -> str: + return "" + + def get_web_path(self) -> str: + return "" + + def find_image_in_dir(self, input_filename: str, dir: str) -> Union[str, None]: + if os.path.isfile(dir + "/" + input_filename): + return dir + "/" + input_filename + else: + return None + + def find_image(self, input_filename: str) -> Union[str, None]: + for dir in self.get_lookup_dirs(): + image = self.find_image_in_dir(input_filename, dir) + if image: + return image + return None + + def publish_image(self, target_name, relative: bool=True) -> str: + cache_path = self.get_cache_dir() + "/" + target_name + if not os.path.isfile(cache_path): + raise FileNotFoundError(f'Image {target_name} not cached') + target_path = self.get_public_dir() + "/" + target_name + try: + if os.path.exists(target_path): + if os.path.getmtime(cache_path) > os.path.getmtime(target_path): + os.remove(target_path) + os.link(cache_path, target_path) + else: + os.link(cache_path, target_path) + except OSError as e: + if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy + shutil.copyfile(cache_path, target_path) + else: + raise e + return target_name if relative else target_path + + + +class ImageProcessorCacheSearcher(ImageProcessorSearcher): + cache_dir: str + + def __init__(self, cache_dir: str): + self.cache_dir = cache_dir + + def get_lookup_dirs(self) -> list[str]: + return [self.cache_dir] + + def get_cache_dir(self) -> str: + return self.cache_dir + + def get_public_dir(self) -> str: + return "" + + def get_web_path(self) -> str: + return "" + + def publish_image(self, target_name, relative: bool=True) -> str: + raise NotImplementedError(); + +class ImageProcessorNamespaceSearcher(ImageProcessorSearcher): + namespace: ImageProcessorNamespace + rel_dir: str + source_dir: str + + def __init__(self, namespace: ImageProcessorNamespace, rel_dir: str, source_dir: str): + self.namespace = namespace + self.rel_dir = rel_dir + self.source_dir = source_dir + + def get_lookup_dirs(self) -> list[str]: + return self.namespace.lookup_dirs + ([self.source_dir] if self.namespace.include_src else []) + + def transform_path(self, path: str) -> str: + return path.replace("$dir", self.rel_dir) + + def get_cache_dir(self) -> str: + return self.transform_path(self.namespace.cache_dir) + + def get_public_dir(self) -> str: + return self.transform_path(self.namespace.public_dir) + + def get_web_path(self) -> str: + return self.transform_path(self.namespace.web_path) + + def get_cache_searcher(self) -> ImageProcessorCacheSearcher: + return ImageProcessorCacheSearcher(self.get_cache_dir()) + +class ImageProcessor: + namespaces: dict[str, ImageProcessorNamespace] + + def __init__(self, namespaces: dict[str, ImageProcessorNamespace]): + self.namespaces = namespaces + + def get_namespace_by_path(self, path: str) -> ImageProcessorNamespace: + return self.namespaces[path.split(":")[0] if ":" in path else ""] + + def get_searcher_by_path(self, path: str, rel_dir: str, source_dir: str) -> ImageProcessorNamespaceSearcher: + return ImageProcessorNamespaceSearcher(self.get_namespace_by_path(path), rel_dir, source_dir) + + def process_image(self, input_filename: str, format: str, searcher: ImageProcessorSearcher, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: name = os.path.basename(input_filename) base, ext = os.path.splitext(name) ext = ext[1:] - full_path = self.find_image(input_filename, [source_dir]) + full_path = searcher.find_image(input_filename) if full_path is None: - raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.') + raise FileNotFoundError(f'Image {input_filename} not found in {searcher.get_lookup_dirs()}.') if format == "jpg": format = "jpeg" @@ -51,7 +166,7 @@ class ImageProcessor: # Locate all dependencies deps_full = [full_path] for dep in deps: - dep_full_path = self.find_image(dep, [source_dir]) + dep_full_path = searcher.find_image(dep) if dep_full_path is None: raise FileNotFoundError(f'Image dependency {dep} not found.') deps_full.append(dep_full_path) @@ -65,7 +180,7 @@ class ImageProcessor: if quality is not None: suffix += f'_q{quality}' target_name = base+suffix+"."+format - target_path = self.cache_dir + "/" + target_name + target_path = searcher.get_cache_dir() + "/" + target_name # Only regenerate if the file doesn't already exist and no dependencies are newer if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full): @@ -80,13 +195,13 @@ class ImageProcessor: # Try to find the converted filename in lookup_dirs, if you find # it, don't convert, just copy. - elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps): - shutil.copyfile(self.find_image(target_name, [source_dir]), target_path) + elif searcher.find_image(target_name) is not None and not self.is_outdated(searcher.find_image(target_name), deps): + shutil.copyfile(searcher.find_image(target_name), target_path) # Process asymptote elif ext == "asy": # Collect dependencies - deps_dir = self.cache_dir + "/" + name + "_deps" + deps_dir = searcher.get_cache_dir() + "/" + name + "_deps" if not os.path.isdir(deps_dir): os.mkdir(deps_dir) for dep_full in deps_full: @@ -96,7 +211,7 @@ class ImageProcessor: dpi_arg = ['-render', str(dpi/72)] if dpi is not None else [] if subprocess.run(['asy', name, '-o', target_name, '-f', format, *dpi_arg], cwd=deps_dir).returncode != 0: raise AsyError(f"Could not convert '{full_path}' to '{format}'") - shutil.move(deps_dir + "/" + target_name, self.cache_dir + "/" + target_name) + shutil.move(deps_dir + "/" + target_name, searcher.get_cache_dir() + "/" + target_name) # Convert SVGs using inkscape elif ext == "svg": @@ -124,37 +239,7 @@ class ImageProcessor: return True return False - def publish_image(self, target_name, relative: bool=True) -> str: - cache_path = self.cache_dir + "/" + target_name - if not os.path.isfile(cache_path): - raise FileNotFoundError(f'Image {target_name} not cached') - target_path = self.public_dir + "/" + target_name - try: - if os.path.exists(target_path): - if os.path.getmtime(cache_path) > os.path.getmtime(target_path): - os.remove(target_path) - os.link(cache_path, target_path) - else: - os.link(cache_path, target_path) - except OSError as e: - if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy - shutil.copyfile(cache_path, target_path) - else: - raise e - return target_name if relative else target_path - - def get_image_size(self, input_filename: str, additional_dirs: list[str]=[]) -> tuple[int, int]: - full_path = self.find_image(input_filename, additional_dirs) - if full_path is None: - raise FileNotFoundError(f'Image {input_filename} not found.') + def get_image_size(self, full_path: str) -> tuple[int, int]: # Getting image size using ImageMagick is slow. VERY return Image.open(full_path).size - - def find_image(self, input_filename: str, additional_dirs: list[str]=[]) -> Union[str, None]: - for dir in [*self.lookup_dirs, *additional_dirs]: - if os.path.isfile(dir + "/" + input_filename): - return dir + "/" + input_filename - - - diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py index c97b90d..570478e 100644 --- a/src/formatitko/tex_generator.py +++ b/src/formatitko/tex_generator.py @@ -8,7 +8,7 @@ from typing import Union import os from .output_generator import OutputGenerator -from .images import ImageProcessor +from .images import ImageProcessor, ImageProcessorNamespaceSearcher from .whitespace import NBSP from .elements import FQuoted @@ -111,8 +111,12 @@ class UCWTexGenerator(OutputGenerator): additional_args = self.get_image_processor_args(e.attributes) - # The directory of the current file, will also look for images there. + # The directory of the current file relative to the current working directory source_dir = self.context.dir + # The directory of the current file relative to the md file we were called on + rel_dir = self.context.rel_dir + + searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir) _, ext = os.path.splitext(url) ext = ext[1:] @@ -122,17 +126,17 @@ class UCWTexGenerator(OutputGenerator): # Even supported elements have to be 'converted' because the # processing contains finding and moving them to the cache # directory. - url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) + url = self.imageProcessor.process_image(url, ext, searcher, **additional_args) elif ext in ["svg"]: # FIXME - url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args) elif ext in ["epdf"]: - url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args) elif ext in ["jpg"]: - url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args) else: - url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args) - url = self.imageProcessor.find_image(url, [self.imageProcessor.cache_dir]) + url = searcher.get_cache_searcher().find_image(url) width = "" if "width" in e.attributes: width = e.attributes["width"]