Částečně předělán image processing, resolved #4, resolved #6.

2023-07-19 19:16:14 +02:00 · 2023-07-19 19:16:14 +02:00 · 6dd2cbc995
commit 6dd2cbc995
parent 6606c72a55
5 changed files with 85 additions and 31 deletions
--- a/src/formatitko/formatitko.py
+++ b/src/formatitko/formatitko.py
@ -20,7 +20,8 @@ def main():
 	# Initialize command line arguments
 	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 	parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown file.", nargs="+", default=[])
-	parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", default="public")
+	parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will overwrite images, whose dependencies are newer.", default="public")
+	parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache")
 	parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/")
 	parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", default="output.html")
 	parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", default="output.tex")
@ -49,14 +50,14 @@ def main():
 	doc.content = [Group(*doc.content, metadata={"language":language})]

 	# Initialize the image processor (this just keeps some basic state)
-	imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs)
+	imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs)

 	# Initialize KaTeX client (this runs the node app and connects to a unix socket)
 	with KatexClient() as katexClient:
 		# Generate HTML and TeX out of the transformed document
 		#open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
 		#open(args.output_tex, "w").write(tex(doc, imageProcessor))
-		OutputGenerator(sys.stdout).generate(doc)
+		HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc)

 	if args.debug:
 		print(show(doc))
--- a/src/formatitko/html_generator.py
+++ b/src/formatitko/html_generator.py
@ -127,16 +127,7 @@ class HTMLGenerator(OutputGenerator):
 	def generate_Image(self, e: Image):
 		url = e.url

-		# Attributes → image processor args
-		additional_args = {}
-		if "file-width" in e.attributes:
-			additional_args["width"] = int(e.attributes["file-width"])
-		if "file-height" in e.attributes:
-			additional_args["height"] = int(e.attributes["file-height"])
-		if "file-quality" in e.attributes:
-			additional_args["quality"] = int(e.attributes["file-quality"])
-		if "file-dpi" in e.attributes:
-			additional_args["dpi"] = int(e.attributes["file-dpi"])
+		additional_args = self.get_image_processor_args(e.attributes)

 		# The directory of the current file, will also look for images there.
 		source_dir = e.attributes["source_dir"]
@ -168,15 +159,18 @@ class HTMLGenerator(OutputGenerator):
 			# This is inspired by @vojta001's blogPhoto shortcode he made for
 			# patek.cz:
 			# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
-			width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.public_dir])
+			width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir])
 			sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
 			for size in sizes:
 				if width <= size[0] and height <= size[1]:
 					srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w'))
 					break
 				quality = size[2] if ext == "jpeg" else None
-				srcset.append((f'{self.imageProcessor.web_path}/{self.imageProcessor.process_image(url, ext, self.imageProcessor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
+				cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality)
+				self.imageProcessor.publish_image(cache_img)
+				srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w'))

+		self.imageProcessor.publish_image(url)
 		url = self.imageProcessor.web_path + "/" + url
 		
 		attributes = self.common_attributes(e)
--- a/src/formatitko/images.py
+++ b/src/formatitko/images.py
@ -1,24 +1,38 @@
-from typing import List, Union
+from typing import List, Union, Tuple
 import os
 import shutil
 import subprocess
 from PIL import Image

+class FileInWrongDirError(Exception):
+	pass
+
 class ImageProcessor:
-	def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]):
+	def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: List[str]):
 		self.public_dir = public_dir
+		self.cache_dir = cache_dir
 		self.lookup_dirs = lookup_dirs
 		self.web_path = web_path if web_path[-1] != "/" else web_path[:-1]
 		if not os.path.exists(self.public_dir):
 			os.mkdir(self.public_dir)
+		if not os.path.exists(self.cache_dir):
+			os.mkdir(self.cache_dir)

-	def process_image(self, input_filename: str, format: str, source_dir: str, relative: bool=True, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True) -> str:
+	def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: List[str]=[]) -> str:
 		name = os.path.basename(input_filename)
 		base, ext = os.path.splitext(name)
 		ext = ext[1:]
 		full_path = self.find_image(input_filename, [source_dir])
 		if full_path is None:
-			raise FileNotFoundError(f'Image {input_filename} not found.')
+			raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.')
+
+		# Locate all dependencies
+		deps_full = [full_path]
+		for dep in deps:
+			dep_full_path = self.find_image(dep, [source_dir])
+			if dep_full_path is None:
+				raise FileNotFoundError(f'Image dependency {dep} not found.')
+			deps_full.append(dep_full_path)

 		# Generate filename from arguments
 		suffix = ""
@ -29,10 +43,10 @@ class ImageProcessor:
 		if quality is not None:
 			suffix += f'_q{quality}'
 		target_name = base+suffix+"."+format
-		target_path = self.public_dir + "/" + target_name
+		target_path = self.cache_dir + "/" + target_name
 	
-		# Only regenerate if the file doesn't already exist.
-		if not os.path.isfile(target_path):
+		# Only regenerate if the file doesn't already exist and no dependencies are newer
+		if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full):

 			# If the format is the same or it is just a different extension for
 			# the same format, just copy it.
@ -44,7 +58,7 @@ class ImageProcessor:

 			# Try to find the converted filename in lookup_dirs, if you find
 			# it, don't convert, just copy.
-			elif self.find_image(target_name, [source_dir]):
+			elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps):
 				shutil.copyfile(self.find_image(target_name, [source_dir]), target_path)

 			# Convert SVGs using inkscape
@ -63,10 +77,37 @@ class ImageProcessor:
 				if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0:
 					raise Exception(f"Could not convert '{full_path}' to '{format}'")

+		return target_name
+
+	def is_outdated(self, target: str, deps: List[str]):
+		target_timestamp = os.path.getmtime(target)
+		for dep in deps:
+			dep_timestamp = os.path.getmtime(dep)
+			if dep_timestamp > target_timestamp:
+				return True
+		return False
+
+	def publish_image(self, target_name, relative: bool=True) -> str:
+		import sys
+		cache_path = self.cache_dir + "/" + target_name
+		if not os.path.isfile(cache_path):
+			raise FileNotFoundError(f'Image {target_name} not cached')
+		target_path = self.public_dir + "/" + target_name
+		try:
+			if os.path.exists(target_path):
+				if os.path.getmtime(cache_path) > os.path.getmtime(target_path):
+					os.remove(target_path)
+					os.link(cache_path, target_path)
+			else:
+				os.link(cache_path, target_path)
+		except OSError as e:
+			if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy
+				shutil.copyfile(cache_path, target_path)
+			else:
+				raise e
 		return target_name if relative else target_path

-
-	def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> (int, int):
+	def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> Tuple[int, int]:
 		full_path = self.find_image(input_filename, additional_dirs)
 		if full_path is None:
 			raise FileNotFoundError(f'Image {input_filename} not found.')
--- a/src/formatitko/output_generator.py
+++ b/src/formatitko/output_generator.py
@ -384,3 +384,19 @@ class OutputGenerator:
 		self.writeln(self.stag(tag, attributes))
 		self.writeraw(e.text)
 		self.writeln(self.etag(tag))
+
+	def get_image_processor_args(self, attributes:Dict[str,str]) -> Dict:
+		# Attributes → image processor args
+		additional_args = {}
+		if "file-width" in attributes:
+			additional_args["width"] = int(attributes["file-width"])
+		if "file-height" in attributes:
+			additional_args["height"] = int(attributes["file-height"])
+		if "file-quality" in attributes:
+			additional_args["quality"] = int(attributes["file-quality"])
+		if "file-dpi" in attributes:
+			additional_args["dpi"] = int(attributes["file-dpi"])
+		if "file-deps" in attributes:
+			additional_args["deps"] = attributes["file-deps"].split(",")
+		
+		return additional_args
--- a/src/formatitko/tex.py
+++ b/src/formatitko/tex.py
@ -73,6 +73,7 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0
 	if isinstance(e, Image):
 		url = e.url

+		# TODO: This should use OutputGenerator's get_image_processor_args
 		# Attributes → image processor args
 		additional_args = {}
 		if "file-width" in e.attributes:
@ -93,18 +94,19 @@ def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0
 		# Conversions between various formats.
 		if ext in ["pdf", "png", "jpeg"]:
 			# Even supported elements have to be 'converted' because the
-			# processing contains finding and moving them to the output
+			# processing contains finding and moving them to the cache
 			# directory.
-			url = i.process_image(url, ext, source_dir, relative=False, **additional_args)
+			url = i.process_image(url, ext, source_dir, **additional_args)
 		elif ext in ["svg"]:
-			url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
+			url = i.process_image(url, "pdf", source_dir, **additional_args)
 		elif ext in ["epdf"]:
-			url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
+			url = i.process_image(url, "pdf", source_dir, **additional_args)
 		elif ext in ["jpg"]:
-			url = i.process_image(url, "jpeg", source_dir, relative=False, **additional_args)
+			url = i.process_image(url, "jpeg", source_dir, **additional_args)
 		else:
-			url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
+			url = i.process_image(url, "pdf", source_dir, **additional_args)

+		url = i.find_image(url, [i.cache_dir])
 		width = ""
 		if "width" in e.attributes:
 			width = e.attributes["width"]