Částečně předělán image processing, resolved #4, #6.

This commit is contained in:
Jan Černohorský 2023-07-19 19:16:14 +02:00
parent 6606c72a55
commit 166251ad71
5 changed files with 85 additions and 31 deletions

View file

@ -20,7 +20,8 @@ def main():
# Initialize command line arguments # Initialize command line arguments
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown file.", nargs="+", default=[]) parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown file.", nargs="+", default=[])
parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", default="public") parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will overwrite images, whose dependencies are newer.", default="public")
parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache")
parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/") parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/")
parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", default="output.html") parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", default="output.html")
parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", default="output.tex") parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", default="output.tex")
@ -49,14 +50,14 @@ def main():
doc.content = [Group(*doc.content, metadata={"language":language})] doc.content = [Group(*doc.content, metadata={"language":language})]
# Initialize the image processor (this just keeps some basic state) # Initialize the image processor (this just keeps some basic state)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs) imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs)
# Initialize KaTeX client (this runs the node app and connects to a unix socket) # Initialize KaTeX client (this runs the node app and connects to a unix socket)
with KatexClient() as katexClient: with KatexClient() as katexClient:
# Generate HTML and TeX out of the transformed document # Generate HTML and TeX out of the transformed document
#open(args.output_html, "w").write(html(doc, katexClient, imageProcessor)) #open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
#open(args.output_tex, "w").write(tex(doc, imageProcessor)) #open(args.output_tex, "w").write(tex(doc, imageProcessor))
OutputGenerator(sys.stdout).generate(doc) HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc)
if args.debug: if args.debug:
print(show(doc)) print(show(doc))

View file

@ -127,16 +127,7 @@ class HTMLGenerator(OutputGenerator):
def generate_Image(self, e: Image): def generate_Image(self, e: Image):
url = e.url url = e.url
# Attributes → image processor args additional_args = self.get_image_processor_args(e.attributes)
additional_args = {}
if "file-width" in e.attributes:
additional_args["width"] = int(e.attributes["file-width"])
if "file-height" in e.attributes:
additional_args["height"] = int(e.attributes["file-height"])
if "file-quality" in e.attributes:
additional_args["quality"] = int(e.attributes["file-quality"])
if "file-dpi" in e.attributes:
additional_args["dpi"] = int(e.attributes["file-dpi"])
# The directory of the current file, will also look for images there. # The directory of the current file, will also look for images there.
source_dir = e.attributes["source_dir"] source_dir = e.attributes["source_dir"]
@ -168,15 +159,18 @@ class HTMLGenerator(OutputGenerator):
# This is inspired by @vojta001's blogPhoto shortcode he made for # This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz: # patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.public_dir]) width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir])
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes: for size in sizes:
if width <= size[0] and height <= size[1]: if width <= size[0] and height <= size[1]:
srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w')) srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w'))
break break
quality = size[2] if ext == "jpeg" else None quality = size[2] if ext == "jpeg" else None
srcset.append((f'{self.imageProcessor.web_path}/{self.imageProcessor.process_image(url, ext, self.imageProcessor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w')) cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality)
self.imageProcessor.publish_image(cache_img)
srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w'))
self.imageProcessor.publish_image(url)
url = self.imageProcessor.web_path + "/" + url url = self.imageProcessor.web_path + "/" + url
attributes = self.common_attributes(e) attributes = self.common_attributes(e)

View file

@ -1,24 +1,38 @@
from typing import List, Union from typing import List, Union, Tuple
import os import os
import shutil import shutil
import subprocess import subprocess
from PIL import Image from PIL import Image
class FileInWrongDirError(Exception):
pass
class ImageProcessor: class ImageProcessor:
def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]): def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: List[str]):
self.public_dir = public_dir self.public_dir = public_dir
self.cache_dir = cache_dir
self.lookup_dirs = lookup_dirs self.lookup_dirs = lookup_dirs
self.web_path = web_path if web_path[-1] != "/" else web_path[:-1] self.web_path = web_path if web_path[-1] != "/" else web_path[:-1]
if not os.path.exists(self.public_dir): if not os.path.exists(self.public_dir):
os.mkdir(self.public_dir) os.mkdir(self.public_dir)
if not os.path.exists(self.cache_dir):
os.mkdir(self.cache_dir)
def process_image(self, input_filename: str, format: str, source_dir: str, relative: bool=True, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True) -> str: def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: List[str]=[]) -> str:
name = os.path.basename(input_filename) name = os.path.basename(input_filename)
base, ext = os.path.splitext(name) base, ext = os.path.splitext(name)
ext = ext[1:] ext = ext[1:]
full_path = self.find_image(input_filename, [source_dir]) full_path = self.find_image(input_filename, [source_dir])
if full_path is None: if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.') raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.')
# Locate all dependencies
deps_full = [full_path]
for dep in deps:
dep_full_path = self.find_image(dep, [source_dir])
if dep_full_path is None:
raise FileNotFoundError(f'Image dependency {dep} not found.')
deps_full.append(dep_full_path)
# Generate filename from arguments # Generate filename from arguments
suffix = "" suffix = ""
@ -29,10 +43,10 @@ class ImageProcessor:
if quality is not None: if quality is not None:
suffix += f'_q{quality}' suffix += f'_q{quality}'
target_name = base+suffix+"."+format target_name = base+suffix+"."+format
target_path = self.public_dir + "/" + target_name target_path = self.cache_dir + "/" + target_name
# Only regenerate if the file doesn't already exist. # Only regenerate if the file doesn't already exist and no dependencies are newer
if not os.path.isfile(target_path): if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full):
# If the format is the same or it is just a different extension for # If the format is the same or it is just a different extension for
# the same format, just copy it. # the same format, just copy it.
@ -44,7 +58,7 @@ class ImageProcessor:
# Try to find the converted filename in lookup_dirs, if you find # Try to find the converted filename in lookup_dirs, if you find
# it, don't convert, just copy. # it, don't convert, just copy.
elif self.find_image(target_name, [source_dir]): elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps):
shutil.copyfile(self.find_image(target_name, [source_dir]), target_path) shutil.copyfile(self.find_image(target_name, [source_dir]), target_path)
# Convert SVGs using inkscape # Convert SVGs using inkscape
@ -63,10 +77,37 @@ class ImageProcessor:
if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0: if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0:
raise Exception(f"Could not convert '{full_path}' to '{format}'") raise Exception(f"Could not convert '{full_path}' to '{format}'")
return target_name
def is_outdated(self, target: str, deps: List[str]):
target_timestamp = os.path.getmtime(target)
for dep in deps:
dep_timestamp = os.path.getmtime(dep)
if dep_timestamp > target_timestamp:
return True
return False
def publish_image(self, target_name, relative: bool=True) -> str:
import sys
cache_path = self.cache_dir + "/" + target_name
if not os.path.isfile(cache_path):
raise FileNotFoundError(f'Image {target_name} not cached')
target_path = self.public_dir + "/" + target_name
try:
if os.path.exists(target_path):
if os.path.getmtime(cache_path) > os.path.getmtime(target_path):
os.remove(target_path)
os.link(cache_path, target_path)
else:
os.link(cache_path, target_path)
except OSError as e:
if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy
shutil.copyfile(cache_path, target_path)
else:
raise e
return target_name if relative else target_path return target_name if relative else target_path
def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> Tuple[int, int]:
def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> (int, int):
full_path = self.find_image(input_filename, additional_dirs) full_path = self.find_image(input_filename, additional_dirs)
if full_path is None: if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.') raise FileNotFoundError(f'Image {input_filename} not found.')

View file

@ -384,3 +384,19 @@ class OutputGenerator:
self.writeln(self.stag(tag, attributes)) self.writeln(self.stag(tag, attributes))
self.writeraw(e.text) self.writeraw(e.text)
self.writeln(self.etag(tag)) self.writeln(self.etag(tag))
def get_image_processor_args(self, attributes:Dict[str,str]) -> Dict:
# Attributes → image processor args
additional_args = {}
if "file-width" in attributes:
additional_args["width"] = int(attributes["file-width"])
if "file-height" in attributes:
additional_args["height"] = int(attributes["file-height"])
if "file-quality" in attributes:
additional_args["quality"] = int(attributes["file-quality"])
if "file-dpi" in attributes:
additional_args["dpi"] = int(attributes["file-dpi"])
if "file-deps" in attributes:
additional_args["deps"] = attributes["file-deps"].split(",")
return additional_args

View file

@ -73,6 +73,7 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0
if isinstance(e, Image): if isinstance(e, Image):
url = e.url url = e.url
# TODO: This should use OutputGenerator's get_image_processor_args
# Attributes → image processor args # Attributes → image processor args
additional_args = {} additional_args = {}
if "file-width" in e.attributes: if "file-width" in e.attributes:
@ -93,18 +94,19 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0
# Conversions between various formats. # Conversions between various formats.
if ext in ["pdf", "png", "jpeg"]: if ext in ["pdf", "png", "jpeg"]:
# Even supported elements have to be 'converted' because the # Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the output # processing contains finding and moving them to the cache
# directory. # directory.
url = i.process_image(url, ext, source_dir, relative=False, **additional_args) url = i.process_image(url, ext, source_dir, **additional_args)
elif ext in ["svg"]: elif ext in ["svg"]:
url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args) url = i.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["epdf"]: elif ext in ["epdf"]:
url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args) url = i.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["jpg"]: elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", source_dir, relative=False, **additional_args) url = i.process_image(url, "jpeg", source_dir, **additional_args)
else: else:
url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args) url = i.process_image(url, "pdf", source_dir, **additional_args)
url = i.find_image(url, [i.cache_dir])
width = "" width = ""
if "width" in e.attributes: if "width" in e.attributes:
width = e.attributes["width"] width = e.attributes["width"]