From 166251ad713db6d7f906f7593952c92dce33da5a Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Wed, 19 Jul 2023 19:16:14 +0200 Subject: [PATCH] =?UTF-8?q?=C4=8C=C3=A1ste=C4=8Dn=C4=9B=20p=C5=99ed=C4=9Bl?= =?UTF-8?q?=C3=A1n=20image=20processing,=20resolved=20#4,=20#6.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/formatitko/formatitko.py | 7 ++-- src/formatitko/html_generator.py | 18 +++------ src/formatitko/images.py | 61 +++++++++++++++++++++++++----- src/formatitko/output_generator.py | 16 ++++++++ src/formatitko/tex.py | 14 ++++--- 5 files changed, 85 insertions(+), 31 deletions(-) diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 0456240..1d0d3ca 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -20,7 +20,8 @@ def main(): # Initialize command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown file.", nargs="+", default=[]) - parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", default="public") + parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will overwrite images, whose dependencies are newer.", default="public") + parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache") parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/") parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", default="output.html") parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", default="output.tex") @@ -49,14 +50,14 @@ def main(): doc.content = [Group(*doc.content, metadata={"language":language})] # Initialize the image processor (this just keeps some basic state) - imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs) + imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs) # Initialize KaTeX client (this runs the node app and connects to a unix socket) with KatexClient() as katexClient: # Generate HTML and TeX out of the transformed document #open(args.output_html, "w").write(html(doc, katexClient, imageProcessor)) #open(args.output_tex, "w").write(tex(doc, imageProcessor)) - OutputGenerator(sys.stdout).generate(doc) + HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc) if args.debug: print(show(doc)) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index ef6c9a7..32e5b2c 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -127,16 +127,7 @@ class HTMLGenerator(OutputGenerator): def generate_Image(self, e: Image): url = e.url - # Attributes → image processor args - additional_args = {} - if "file-width" in e.attributes: - additional_args["width"] = int(e.attributes["file-width"]) - if "file-height" in e.attributes: - additional_args["height"] = int(e.attributes["file-height"]) - if "file-quality" in e.attributes: - additional_args["quality"] = int(e.attributes["file-quality"]) - if "file-dpi" in e.attributes: - additional_args["dpi"] = int(e.attributes["file-dpi"]) + additional_args = self.get_image_processor_args(e.attributes) # The directory of the current file, will also look for images there. source_dir = e.attributes["source_dir"] @@ -168,15 +159,18 @@ class HTMLGenerator(OutputGenerator): # This is inspired by @vojta001's blogPhoto shortcode he made for # patek.cz: # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html - width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.public_dir]) + width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir]) sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) for size in sizes: if width <= size[0] and height <= size[1]: srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w')) break quality = size[2] if ext == "jpeg" else None - srcset.append((f'{self.imageProcessor.web_path}/{self.imageProcessor.process_image(url, ext, self.imageProcessor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w')) + cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality) + self.imageProcessor.publish_image(cache_img) + srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w')) + self.imageProcessor.publish_image(url) url = self.imageProcessor.web_path + "/" + url attributes = self.common_attributes(e) diff --git a/src/formatitko/images.py b/src/formatitko/images.py index 6121bb5..421c9e9 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -1,24 +1,38 @@ -from typing import List, Union +from typing import List, Union, Tuple import os import shutil import subprocess from PIL import Image +class FileInWrongDirError(Exception): + pass + class ImageProcessor: - def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]): + def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: List[str]): self.public_dir = public_dir + self.cache_dir = cache_dir self.lookup_dirs = lookup_dirs self.web_path = web_path if web_path[-1] != "/" else web_path[:-1] if not os.path.exists(self.public_dir): os.mkdir(self.public_dir) + if not os.path.exists(self.cache_dir): + os.mkdir(self.cache_dir) - def process_image(self, input_filename: str, format: str, source_dir: str, relative: bool=True, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True) -> str: + def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: List[str]=[]) -> str: name = os.path.basename(input_filename) base, ext = os.path.splitext(name) ext = ext[1:] full_path = self.find_image(input_filename, [source_dir]) if full_path is None: - raise FileNotFoundError(f'Image {input_filename} not found.') + raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.') + + # Locate all dependencies + deps_full = [full_path] + for dep in deps: + dep_full_path = self.find_image(dep, [source_dir]) + if dep_full_path is None: + raise FileNotFoundError(f'Image dependency {dep} not found.') + deps_full.append(dep_full_path) # Generate filename from arguments suffix = "" @@ -29,10 +43,10 @@ class ImageProcessor: if quality is not None: suffix += f'_q{quality}' target_name = base+suffix+"."+format - target_path = self.public_dir + "/" + target_name + target_path = self.cache_dir + "/" + target_name - # Only regenerate if the file doesn't already exist. - if not os.path.isfile(target_path): + # Only regenerate if the file doesn't already exist and no dependencies are newer + if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full): # If the format is the same or it is just a different extension for # the same format, just copy it. @@ -44,7 +58,7 @@ class ImageProcessor: # Try to find the converted filename in lookup_dirs, if you find # it, don't convert, just copy. - elif self.find_image(target_name, [source_dir]): + elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps): shutil.copyfile(self.find_image(target_name, [source_dir]), target_path) # Convert SVGs using inkscape @@ -63,10 +77,37 @@ class ImageProcessor: if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0: raise Exception(f"Could not convert '{full_path}' to '{format}'") + return target_name + + def is_outdated(self, target: str, deps: List[str]): + target_timestamp = os.path.getmtime(target) + for dep in deps: + dep_timestamp = os.path.getmtime(dep) + if dep_timestamp > target_timestamp: + return True + return False + + def publish_image(self, target_name, relative: bool=True) -> str: + import sys + cache_path = self.cache_dir + "/" + target_name + if not os.path.isfile(cache_path): + raise FileNotFoundError(f'Image {target_name} not cached') + target_path = self.public_dir + "/" + target_name + try: + if os.path.exists(target_path): + if os.path.getmtime(cache_path) > os.path.getmtime(target_path): + os.remove(target_path) + os.link(cache_path, target_path) + else: + os.link(cache_path, target_path) + except OSError as e: + if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy + shutil.copyfile(cache_path, target_path) + else: + raise e return target_name if relative else target_path - - def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> (int, int): + def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> Tuple[int, int]: full_path = self.find_image(input_filename, additional_dirs) if full_path is None: raise FileNotFoundError(f'Image {input_filename} not found.') diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 7f29779..562ece1 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -384,3 +384,19 @@ class OutputGenerator: self.writeln(self.stag(tag, attributes)) self.writeraw(e.text) self.writeln(self.etag(tag)) + + def get_image_processor_args(self, attributes:Dict[str,str]) -> Dict: + # Attributes → image processor args + additional_args = {} + if "file-width" in attributes: + additional_args["width"] = int(attributes["file-width"]) + if "file-height" in attributes: + additional_args["height"] = int(attributes["file-height"]) + if "file-quality" in attributes: + additional_args["quality"] = int(attributes["file-quality"]) + if "file-dpi" in attributes: + additional_args["dpi"] = int(attributes["file-dpi"]) + if "file-deps" in attributes: + additional_args["deps"] = attributes["file-deps"].split(",") + + return additional_args diff --git a/src/formatitko/tex.py b/src/formatitko/tex.py index 3d47568..5352fd3 100644 --- a/src/formatitko/tex.py +++ b/src/formatitko/tex.py @@ -73,6 +73,7 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0 if isinstance(e, Image): url = e.url + # TODO: This should use OutputGenerator's get_image_processor_args # Attributes → image processor args additional_args = {} if "file-width" in e.attributes: @@ -93,18 +94,19 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0 # Conversions between various formats. if ext in ["pdf", "png", "jpeg"]: # Even supported elements have to be 'converted' because the - # processing contains finding and moving them to the output + # processing contains finding and moving them to the cache # directory. - url = i.process_image(url, ext, source_dir, relative=False, **additional_args) + url = i.process_image(url, ext, source_dir, **additional_args) elif ext in ["svg"]: - url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args) + url = i.process_image(url, "pdf", source_dir, **additional_args) elif ext in ["epdf"]: - url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args) + url = i.process_image(url, "pdf", source_dir, **additional_args) elif ext in ["jpg"]: - url = i.process_image(url, "jpeg", source_dir, relative=False, **additional_args) + url = i.process_image(url, "jpeg", source_dir, **additional_args) else: - url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args) + url = i.process_image(url, "pdf", source_dir, **additional_args) + url = i.find_image(url, [i.cache_dir]) width = "" if "width" in e.attributes: width = e.attributes["width"]