Částečně předělán image processing, resolved #4, #6.

This commit is contained in:
Jan Černohorský 2023-07-19 19:16:14 +02:00
parent 6606c72a55
commit 166251ad71
5 changed files with 85 additions and 31 deletions

View file

@ -20,7 +20,8 @@ def main():
# Initialize command line arguments
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown file.", nargs="+", default=[])
parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", default="public")
parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will overwrite images, whose dependencies are newer.", default="public")
parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache")
parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/")
parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", default="output.html")
parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", default="output.tex")
@ -49,14 +50,14 @@ def main():
doc.content = [Group(*doc.content, metadata={"language":language})]
# Initialize the image processor (this just keeps some basic state)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs)
# Initialize KaTeX client (this runs the node app and connects to a unix socket)
with KatexClient() as katexClient:
# Generate HTML and TeX out of the transformed document
#open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
#open(args.output_tex, "w").write(tex(doc, imageProcessor))
OutputGenerator(sys.stdout).generate(doc)
HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc)
if args.debug:
print(show(doc))

View file

@ -127,16 +127,7 @@ class HTMLGenerator(OutputGenerator):
def generate_Image(self, e: Image):
url = e.url
# Attributes → image processor args
additional_args = {}
if "file-width" in e.attributes:
additional_args["width"] = int(e.attributes["file-width"])
if "file-height" in e.attributes:
additional_args["height"] = int(e.attributes["file-height"])
if "file-quality" in e.attributes:
additional_args["quality"] = int(e.attributes["file-quality"])
if "file-dpi" in e.attributes:
additional_args["dpi"] = int(e.attributes["file-dpi"])
additional_args = self.get_image_processor_args(e.attributes)
# The directory of the current file, will also look for images there.
source_dir = e.attributes["source_dir"]
@ -168,15 +159,18 @@ class HTMLGenerator(OutputGenerator):
# This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.public_dir])
width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir])
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes:
if width <= size[0] and height <= size[1]:
srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w'))
break
quality = size[2] if ext == "jpeg" else None
srcset.append((f'{self.imageProcessor.web_path}/{self.imageProcessor.process_image(url, ext, self.imageProcessor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality)
self.imageProcessor.publish_image(cache_img)
srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w'))
self.imageProcessor.publish_image(url)
url = self.imageProcessor.web_path + "/" + url
attributes = self.common_attributes(e)

View file

@ -1,24 +1,38 @@
from typing import List, Union
from typing import List, Union, Tuple
import os
import shutil
import subprocess
from PIL import Image
class FileInWrongDirError(Exception):
pass
class ImageProcessor:
def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]):
def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: List[str]):
self.public_dir = public_dir
self.cache_dir = cache_dir
self.lookup_dirs = lookup_dirs
self.web_path = web_path if web_path[-1] != "/" else web_path[:-1]
if not os.path.exists(self.public_dir):
os.mkdir(self.public_dir)
if not os.path.exists(self.cache_dir):
os.mkdir(self.cache_dir)
def process_image(self, input_filename: str, format: str, source_dir: str, relative: bool=True, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True) -> str:
def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: List[str]=[]) -> str:
name = os.path.basename(input_filename)
base, ext = os.path.splitext(name)
ext = ext[1:]
full_path = self.find_image(input_filename, [source_dir])
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.')
# Locate all dependencies
deps_full = [full_path]
for dep in deps:
dep_full_path = self.find_image(dep, [source_dir])
if dep_full_path is None:
raise FileNotFoundError(f'Image dependency {dep} not found.')
deps_full.append(dep_full_path)
# Generate filename from arguments
suffix = ""
@ -29,10 +43,10 @@ class ImageProcessor:
if quality is not None:
suffix += f'_q{quality}'
target_name = base+suffix+"."+format
target_path = self.public_dir + "/" + target_name
target_path = self.cache_dir + "/" + target_name
# Only regenerate if the file doesn't already exist.
if not os.path.isfile(target_path):
# Only regenerate if the file doesn't already exist and no dependencies are newer
if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full):
# If the format is the same or it is just a different extension for
# the same format, just copy it.
@ -44,7 +58,7 @@ class ImageProcessor:
# Try to find the converted filename in lookup_dirs, if you find
# it, don't convert, just copy.
elif self.find_image(target_name, [source_dir]):
elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps):
shutil.copyfile(self.find_image(target_name, [source_dir]), target_path)
# Convert SVGs using inkscape
@ -63,10 +77,37 @@ class ImageProcessor:
if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0:
raise Exception(f"Could not convert '{full_path}' to '{format}'")
return target_name
def is_outdated(self, target: str, deps: List[str]):
target_timestamp = os.path.getmtime(target)
for dep in deps:
dep_timestamp = os.path.getmtime(dep)
if dep_timestamp > target_timestamp:
return True
return False
def publish_image(self, target_name, relative: bool=True) -> str:
import sys
cache_path = self.cache_dir + "/" + target_name
if not os.path.isfile(cache_path):
raise FileNotFoundError(f'Image {target_name} not cached')
target_path = self.public_dir + "/" + target_name
try:
if os.path.exists(target_path):
if os.path.getmtime(cache_path) > os.path.getmtime(target_path):
os.remove(target_path)
os.link(cache_path, target_path)
else:
os.link(cache_path, target_path)
except OSError as e:
if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy
shutil.copyfile(cache_path, target_path)
else:
raise e
return target_name if relative else target_path
def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> (int, int):
def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> Tuple[int, int]:
full_path = self.find_image(input_filename, additional_dirs)
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')

View file

@ -384,3 +384,19 @@ class OutputGenerator:
self.writeln(self.stag(tag, attributes))
self.writeraw(e.text)
self.writeln(self.etag(tag))
def get_image_processor_args(self, attributes:Dict[str,str]) -> Dict:
# Attributes → image processor args
additional_args = {}
if "file-width" in attributes:
additional_args["width"] = int(attributes["file-width"])
if "file-height" in attributes:
additional_args["height"] = int(attributes["file-height"])
if "file-quality" in attributes:
additional_args["quality"] = int(attributes["file-quality"])
if "file-dpi" in attributes:
additional_args["dpi"] = int(attributes["file-dpi"])
if "file-deps" in attributes:
additional_args["deps"] = attributes["file-deps"].split(",")
return additional_args

View file

@ -73,6 +73,7 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0
if isinstance(e, Image):
url = e.url
# TODO: This should use OutputGenerator's get_image_processor_args
# Attributes → image processor args
additional_args = {}
if "file-width" in e.attributes:
@ -93,18 +94,19 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0
# Conversions between various formats.
if ext in ["pdf", "png", "jpeg"]:
# Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the output
# processing contains finding and moving them to the cache
# directory.
url = i.process_image(url, ext, source_dir, relative=False, **additional_args)
url = i.process_image(url, ext, source_dir, **additional_args)
elif ext in ["svg"]:
url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
url = i.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["epdf"]:
url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
url = i.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", source_dir, relative=False, **additional_args)
url = i.process_image(url, "jpeg", source_dir, **additional_args)
else:
url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
url = i.process_image(url, "pdf", source_dir, **additional_args)
url = i.find_image(url, [i.cache_dir])
width = ""
if "width" in e.attributes:
width = e.attributes["width"]