Improved image processing

Images are now looked up relative to their path. Srcsets are generated for web images for fast loading.
2023-02-06 16:56:52 +01:00 · 2023-02-06 16:56:52 +01:00 · fbf491575f
commit fbf491575f
parent 303dcfaa1f
10 changed files with 82 additions and 43 deletions
--- a/context.py
+++ b/context.py
@ -1,5 +1,6 @@

 from panflute import Doc
+import os

 class Context:
 	def __init__(self, doc: Doc, path: str, parent: 'Context'=None):
@ -7,6 +8,8 @@ class Context:
 		self._commands = {}
 		self.doc = doc
 		self.path = path
+		self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "."
+		self.filename = os.path.basename(path)
 		if self.get_metadata("flags", immediate=True) is None:
 			self.set_metadata("flags", {})

--- a/formatitko.py
+++ b/formatitko.py
@ -19,8 +19,9 @@ from images import ImageProcessor
 from mj_show import show

 parser = argparse.ArgumentParser()
-parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of the MarkDown file.", nargs="+", default=[])
+parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of each MarkDown file.", nargs="+", default=[])
 parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", nargs=1, default="public")
+parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", nargs=1, default="/")
 parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", nargs=1, default="output.html")
 parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", nargs=1, default="output.tex")
 parser.add_argument("input_filename", help="The MarkDown file to process.")
@ -29,15 +30,14 @@ args = parser.parse_args()
 doc = import_md(open(args.input_filename, "r").read())

 language = doc.get_metadata("language", None, True)
-context = Context(doc, sys.argv[1])
+context = Context(doc, args.input_filename)

 doc = doc.walk(transform, context)

 doc.content = [Group(*doc.content, metadata={"language":language})]

 katexClient = KatexClient()
-doc_dir = os.path.dirname(args.input_filename) if os.path.dirname(args.input_filename) != "" else "."
-imageProcessor = ImageProcessor(args.img_public_dir, doc_dir, *args.img_lookup_dirs)
+imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs)

 open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
 open(args.output_tex, "w").write(tex(doc, imageProcessor))
--- a/html.py
+++ b/html.py
@ -104,17 +104,41 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind

 	if isinstance(e, Image):
 		url = e.url
+		source_dir = e.attributes["source_dir"]
 		_, ext = os.path.splitext(url)
 		ext = ext[1:]
 		if ext in ["svg", "png", "jpeg", "gif"]:
-			url = i.process_image(url, ext)
+			url = i.process_image(url, ext, source_dir)
 		elif ext in ["pdf", "epdf"]:
-			url = i.process_image(url, "png", dpi=300)
+			url = i.process_image(url, "png", source_dir, dpi=300)
 		elif ext in ["jpg"]:
-			url = i.process_image(url, "jpeg")
+			url = i.process_image(url, "jpeg", source_dir)
 		else:
-			url = i.process_image(url, ".png")
-		return f'<img src="{url}" {"style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}">'
+			url = i.process_image(url, "png", source_dir)
+		
+		_, ext = os.path.splitext(url)
+		ext = ext[1:]
+		srcset = []
+		if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
+			# This is inspired by @vojta001's blogPhoto shortcode he made for
+			# patek.cz:
+			# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
+			width, height = i.get_image_size(url, [source_dir, i.public_dir])
+			sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
+			for size in sizes:
+				if width <= size[0] and height <= size[1]:
+					srcset.append((f'{i.web_path}/{url}', f'{width}w'))
+					break
+				quality = size[2] if ext == "jpeg" else None
+				srcset.append((f'{i.web_path}/{i.process_image(url, ext, source_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
+
+		url = i.web_path + "/" + url
+		
+		attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"'
+		if len(srcset) != 0:
+			return f'<a href="{url}"><img src="{srcset[-1][0]}" srcset="{", ".join([" ".join(src) for src in srcset])}"{attributes}></a>'
+		else:
+			return f'<img src="{url}"{attributes}>'

 	if isinstance(e, Header):
 		tag = "h"+str(e.level)
--- a/images.py
+++ b/images.py
@ -4,29 +4,28 @@ import shutil
 import subprocess

 class ImageProcessor:
-	def __init__(self, public_dir: str, *lookup_dirs: List[str]):
+	def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]):
 		self.public_dir = public_dir
 		self.lookup_dirs = lookup_dirs
+		self.web_path = web_path if web_path[-1] != "/" else web_path[:-1]
 		if not os.path.exists(self.public_dir):
 			os.mkdir(self.public_dir)

-	def process_image(self, input_filename: str, format: str, relative=True, width: int=None, height:int=None, quality: int=None, dpi: int=None) -> str:
+	def process_image(self, input_filename: str, format: str, source_dir: str, relative: bool=True, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True) -> str:
 		name = os.path.basename(input_filename)
 		base, ext = os.path.splitext(name)
 		ext = ext[1:]
-		full_path = self.find_image(input_filename)
+		full_path = self.find_image(input_filename, [source_dir])
 		if full_path is None:
 			raise FileNotFoundError(f'Image {input_filename} not found.')
 		
 		suffix = ""
 		geometry = None
 		if width is not None or height is not None:
-			geometry = f'{width if width is not None else ""}x{height if height is not None else ""}'
+			geometry = f'{width if width is not None else ""}x{height if height is not None else ""}{"" if fit else "!"}'
 			suffix += "_"+geometry
 		if quality is not None:
 			suffix += f'_q{quality}'
-		if quality is not None:
-			suffix += f'_d{dpi}'
 		target_name = base+suffix+"."+format
 		target_path = self.public_dir + "/" + target_name
 		
@ -37,8 +36,8 @@ class ImageProcessor:
 			 and width is None and height is None and quality is None and dpi is None):
 				shutil.copyfile(full_path, target_path)

-			elif self.find_image(target_name):
-				shutil.copyfile(self.find_image(target_name), target_path)
+			elif self.find_image(target_name, [source_dir]):
+				shutil.copyfile(self.find_image(target_name, [source_dir]), target_path)

 			elif ext == "svg":
 				width_arg = ['--export-width', str(width)] if width is not None else []
@ -51,15 +50,20 @@ class ImageProcessor:
 				resize_arg = ['-resize', str(geometry)] if geometry is not None else []
 				density_arg = ['-density', str(dpi)] if dpi is not None else []
 				quality_arg = ['-quality', str(quality)] if quality is not None else []
-				if subprocess.run(['convert', full_path, *resize_arg, *density_arg, *quality_arg, target_path]).returncode != 0:
+				if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0:
 					raise Exception(f"Could not convert '{full_path}' to '{format}'")

 		return target_name if relative else target_path


-			
+	def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> (int, int):
+		full_path = self.find_image(input_filename, additional_dirs)
+		if full_path is None:
+			raise FileNotFoundError(f'Image {input_filename} not found.')
+		return (int(x) for x in subprocess.run(['convert', full_path, '-print', '%w %h\\n', '/dev/null'], capture_output=True).stdout.split(b" "))

-	def find_image(self, input_filename) -> str:
-		for dir in self.lookup_dirs:
+
+	def find_image(self, input_filename: str, additional_dirs: List[str]=[]) -> str:
+		for dir in [*self.lookup_dirs, *additional_dirs]:
 			if os.path.isfile(dir + "/" + input_filename):
 				return dir + "/" + input_filename
--- a/test/test-files/logo.jpg
+++ b/test/test-files/logo.jpg
--- a/test/test-files/test-import.md
+++ b/test/test-files/test-import.md
--- a/test/test-files/test-partial.md
+++ b/test/test-files/test-partial.md
@ -51,3 +51,17 @@ $$
 \def\eqalign#1{NO, just, nooooo}
 \eqalign{}
 $$
+
+
+![This is a figure, go figure...](logo.svg){width=25%}
+
+![This is a figure, go figure...](logo.pdf){width=50%}
+
+![This is a figure, go figure...](logo.jpg){width=50%}
+
+![This is a figure, go figure...](logo1.png){width=10em}
+
+![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}
+
+![Fakt epesní reproduktor](reproduktor.png){width=10em}
+
--- a/test/test.md
+++ b/test/test.md
@ -4,7 +4,7 @@ subtitle: 'A subtitle'
 are_we_there_yet: False
 language: "en"
 ---
-[#test-import.md]{}
+[#test-files/test-import.md]{}

 # Hello world!

@ -15,7 +15,7 @@ This is *very **strongly** emphasised*
 Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký kůň pěl dábelské ódy.* **Příliš žluťoučký kůň pěl dábelské ódy.** ***Příliš žluťoučký kůň pěl dábelské ódy.***


-:::{partial=test-partial.md}
+:::{partial=test-files/test-partial.md}
 :::

 :::{if=cat}
@ -47,18 +47,6 @@ This should only be shown to cats the second time

 # [$are_we_there_yet]{}

-![This is a figure, go figure...](logo.svg){width=50%}
-
-![This is a figure, go figure...](logo.pdf){width=50%}
-
-![This is a figure, go figure...](logo.jpg){width=50%}
-
-![This is a figure, go figure...](logo.png){width=10em}
-
-![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}
-
-![Fakt epesní reproduktor](reproduktor.png){width=10em}
-
 ```python {.run}
 ctx.set_metadata("language", "cs")
 ```
@ -105,7 +93,7 @@ $$
 }
 $$

-:::{partial=test-partial.md}
+:::{partial=test-files/test-partial.md}
 :::

 ---
--- a/tex.py
+++ b/tex.py
@ -109,18 +109,19 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
 	
 	if isinstance(e, Image):
 		url = e.url
+		source_dir = e.attributes["source_dir"]
 		_, ext = os.path.splitext(url)
 		ext = ext[1:]
 		if ext in ["pdf", "png", "jpeg"]:
-			url = i.process_image(url, ext, relative=False)
+			url = i.process_image(url, ext, source_dir, relative=False)
 		elif ext in ["svg"]:
-			url = i.process_image(url, "pdf", relative=False)
+			url = i.process_image(url, "pdf", source_dir, relative=False)
 		elif ext in ["epdf"]:
-			url = i.process_image(url, "pdf", relative=False)
+			url = i.process_image(url, "pdf", source_dir, relative=False)
 		elif ext in ["jpg"]:
-			url = i.process_image(url, "jpeg", relative=False)
+			url = i.process_image(url, "jpeg", source_dir, relative=False)
 		else:
-			url = i.process_image(url, "pdf", relative=False)
+			url = i.process_image(url, "pdf", source_dir, relative=False)
 		width = ""
 		if "width" in e.attributes:
 			width = e.attributes["width"]
--- a/transform.py
+++ b/transform.py
@ -40,7 +40,7 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
 		# This is for including content from files with their own flags and
 		# commands without affecting the state of the current document.
 		if (isinstance(e, Div)) and "partial" in e.attributes:
-			includedDoc = import_md(open(e.attributes["partial"], "r").read())
+			includedDoc = import_md(open(c.dir + "/" + e.attributes["partial"], "r").read())
 			nContext = Context(includedDoc, e.attributes["partial"], c)
 			language = includedDoc.get_metadata("language")
 			includedDoc = includedDoc.walk(transform, nContext)
@ -56,6 +56,11 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
 		}
 		e = FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[c.get_metadata("language")])

+	if isinstance(e, Image):
+		e.attributes["source_dir"] = c.dir
+		if not "no-srcset" in e.attributes:
+			e.attributes["no-srcset"] = c.get_metadata("no-srcset") if c.get_metadata("no-srcset") is not None else False
+
 	# Execute python code inside source code block
 	if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and "run" in e.classes:
 		e = Div(*executeCommand(e.text, None, c))
@ -84,7 +89,7 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
 		# This is the exact opposite of include. We take the commands
 		# and flags but drop the content.
 		elif re.match(r"^#.+$", e.content[0].text):
-			importedDoc = import_md(open(e.content[0].text[1:], "r").read())
+			importedDoc = import_md(open(c.dir + "/" + e.content[0].text[1:], "r").read())
 			importedDoc.walk(transform, c)
 			return nullify(e)