Browse Source

Improved image processing

Images are now looked up relative to their path.
Srcsets are generated for web images for fast loading.
pull/28/head
Jan Černohorský 1 year ago
parent
commit
fbf491575f
  1. 3
      context.py
  2. 8
      formatitko.py
  3. 34
      html.py
  4. 28
      images.py
  5. 0
      test/test-files/logo.jpg
  6. 0
      test/test-files/test-import.md
  7. 14
      test/test-files/test-partial.md
  8. 18
      test/test.md
  9. 11
      tex.py
  10. 9
      transform.py

3
context.py

@ -1,5 +1,6 @@
from panflute import Doc
import os
class Context:
def __init__(self, doc: Doc, path: str, parent: 'Context'=None):
@ -7,6 +8,8 @@ class Context:
self._commands = {}
self.doc = doc
self.path = path
self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "."
self.filename = os.path.basename(path)
if self.get_metadata("flags", immediate=True) is None:
self.set_metadata("flags", {})

8
formatitko.py

@ -19,8 +19,9 @@ from images import ImageProcessor
from mj_show import show
parser = argparse.ArgumentParser()
parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of the MarkDown file.", nargs="+", default=[])
parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of each MarkDown file.", nargs="+", default=[])
parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", nargs=1, default="public")
parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", nargs=1, default="/")
parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", nargs=1, default="output.html")
parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", nargs=1, default="output.tex")
parser.add_argument("input_filename", help="The MarkDown file to process.")
@ -29,15 +30,14 @@ args = parser.parse_args()
doc = import_md(open(args.input_filename, "r").read())
language = doc.get_metadata("language", None, True)
context = Context(doc, sys.argv[1])
context = Context(doc, args.input_filename)
doc = doc.walk(transform, context)
doc.content = [Group(*doc.content, metadata={"language":language})]
katexClient = KatexClient()
doc_dir = os.path.dirname(args.input_filename) if os.path.dirname(args.input_filename) != "" else "."
imageProcessor = ImageProcessor(args.img_public_dir, doc_dir, *args.img_lookup_dirs)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs)
open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
open(args.output_tex, "w").write(tex(doc, imageProcessor))

34
html.py

@ -104,17 +104,41 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
if isinstance(e, Image):
url = e.url
source_dir = e.attributes["source_dir"]
_, ext = os.path.splitext(url)
ext = ext[1:]
if ext in ["svg", "png", "jpeg", "gif"]:
url = i.process_image(url, ext)
url = i.process_image(url, ext, source_dir)
elif ext in ["pdf", "epdf"]:
url = i.process_image(url, "png", dpi=300)
url = i.process_image(url, "png", source_dir, dpi=300)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg")
url = i.process_image(url, "jpeg", source_dir)
else:
url = i.process_image(url, ".png")
return f'<img src="{url}" {"style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}">'
url = i.process_image(url, "png", source_dir)
_, ext = os.path.splitext(url)
ext = ext[1:]
srcset = []
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
# This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
width, height = i.get_image_size(url, [source_dir, i.public_dir])
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes:
if width <= size[0] and height <= size[1]:
srcset.append((f'{i.web_path}/{url}', f'{width}w'))
break
quality = size[2] if ext == "jpeg" else None
srcset.append((f'{i.web_path}/{i.process_image(url, ext, source_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
url = i.web_path + "/" + url
attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"'
if len(srcset) != 0:
return f'<a href="{url}"><img src="{srcset[-1][0]}" srcset="{", ".join([" ".join(src) for src in srcset])}"{attributes}></a>'
else:
return f'<img src="{url}"{attributes}>'
if isinstance(e, Header):
tag = "h"+str(e.level)

28
images.py

@ -4,29 +4,28 @@ import shutil
import subprocess
class ImageProcessor:
def __init__(self, public_dir: str, *lookup_dirs: List[str]):
def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]):
self.public_dir = public_dir
self.lookup_dirs = lookup_dirs
self.web_path = web_path if web_path[-1] != "/" else web_path[:-1]
if not os.path.exists(self.public_dir):
os.mkdir(self.public_dir)
def process_image(self, input_filename: str, format: str, relative=True, width: int=None, height:int=None, quality: int=None, dpi: int=None) -> str:
def process_image(self, input_filename: str, format: str, source_dir: str, relative: bool=True, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True) -> str:
name = os.path.basename(input_filename)
base, ext = os.path.splitext(name)
ext = ext[1:]
full_path = self.find_image(input_filename)
full_path = self.find_image(input_filename, [source_dir])
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
suffix = ""
geometry = None
if width is not None or height is not None:
geometry = f'{width if width is not None else ""}x{height if height is not None else ""}'
geometry = f'{width if width is not None else ""}x{height if height is not None else ""}{"" if fit else "!"}'
suffix += "_"+geometry
if quality is not None:
suffix += f'_q{quality}'
if quality is not None:
suffix += f'_d{dpi}'
target_name = base+suffix+"."+format
target_path = self.public_dir + "/" + target_name
@ -37,8 +36,8 @@ class ImageProcessor:
and width is None and height is None and quality is None and dpi is None):
shutil.copyfile(full_path, target_path)
elif self.find_image(target_name):
shutil.copyfile(self.find_image(target_name), target_path)
elif self.find_image(target_name, [source_dir]):
shutil.copyfile(self.find_image(target_name, [source_dir]), target_path)
elif ext == "svg":
width_arg = ['--export-width', str(width)] if width is not None else []
@ -51,15 +50,20 @@ class ImageProcessor:
resize_arg = ['-resize', str(geometry)] if geometry is not None else []
density_arg = ['-density', str(dpi)] if dpi is not None else []
quality_arg = ['-quality', str(quality)] if quality is not None else []
if subprocess.run(['convert', full_path, *resize_arg, *density_arg, *quality_arg, target_path]).returncode != 0:
if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0:
raise Exception(f"Could not convert '{full_path}' to '{format}'")
return target_name if relative else target_path
def get_image_size(self, input_filename: str, additional_dirs: List[str]=[]) -> (int, int):
full_path = self.find_image(input_filename, additional_dirs)
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
return (int(x) for x in subprocess.run(['convert', full_path, '-print', '%w %h\\n', '/dev/null'], capture_output=True).stdout.split(b" "))
def find_image(self, input_filename) -> str:
for dir in self.lookup_dirs:
def find_image(self, input_filename: str, additional_dirs: List[str]=[]) -> str:
for dir in [*self.lookup_dirs, *additional_dirs]:
if os.path.isfile(dir + "/" + input_filename):
return dir + "/" + input_filename

0
test/logo.jpg → test/test-files/logo.jpg

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

0
test/test-import.md → test/test-files/test-import.md

14
test/test-partial.md → test/test-files/test-partial.md

@ -51,3 +51,17 @@ $$
\def\eqalign#1{NO, just, nooooo}
\eqalign{}
$$
![This is a figure, go figure...](logo.svg){width=25%}
![This is a figure, go figure...](logo.pdf){width=50%}
![This is a figure, go figure...](logo.jpg){width=50%}
![This is a figure, go figure...](logo1.png){width=10em}
![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}
![Fakt epesní reproduktor](reproduktor.png){width=10em}

18
test/test.md

@ -4,7 +4,7 @@ subtitle: 'A subtitle'
are_we_there_yet: False
language: "en"
---
[#test-import.md]{}
[#test-files/test-import.md]{}
# Hello world!
@ -15,7 +15,7 @@ This is *very **strongly** emphasised*
Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký kůň pěl dábelské ódy.* **Příliš žluťoučký kůň pěl dábelské ódy.** ***Příliš žluťoučký kůň pěl dábelské ódy.***
:::{partial=test-partial.md}
:::{partial=test-files/test-partial.md}
:::
:::{if=cat}
@ -47,18 +47,6 @@ This should only be shown to cats the second time
# [$are_we_there_yet]{}
![This is a figure, go figure...](logo.svg){width=50%}
![This is a figure, go figure...](logo.pdf){width=50%}
![This is a figure, go figure...](logo.jpg){width=50%}
![This is a figure, go figure...](logo.png){width=10em}
![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}
![Fakt epesní reproduktor](reproduktor.png){width=10em}
```python {.run}
ctx.set_metadata("language", "cs")
```
@ -105,7 +93,7 @@ $$
}
$$
:::{partial=test-partial.md}
:::{partial=test-files/test-partial.md}
:::
---

11
tex.py

@ -109,18 +109,19 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
if isinstance(e, Image):
url = e.url
source_dir = e.attributes["source_dir"]
_, ext = os.path.splitext(url)
ext = ext[1:]
if ext in ["pdf", "png", "jpeg"]:
url = i.process_image(url, ext, relative=False)
url = i.process_image(url, ext, source_dir, relative=False)
elif ext in ["svg"]:
url = i.process_image(url, "pdf", relative=False)
url = i.process_image(url, "pdf", source_dir, relative=False)
elif ext in ["epdf"]:
url = i.process_image(url, "pdf", relative=False)
url = i.process_image(url, "pdf", source_dir, relative=False)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", relative=False)
url = i.process_image(url, "jpeg", source_dir, relative=False)
else:
url = i.process_image(url, "pdf", relative=False)
url = i.process_image(url, "pdf", source_dir, relative=False)
width = ""
if "width" in e.attributes:
width = e.attributes["width"]

9
transform.py

@ -40,7 +40,7 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
# This is for including content from files with their own flags and
# commands without affecting the state of the current document.
if (isinstance(e, Div)) and "partial" in e.attributes:
includedDoc = import_md(open(e.attributes["partial"], "r").read())
includedDoc = import_md(open(c.dir + "/" + e.attributes["partial"], "r").read())
nContext = Context(includedDoc, e.attributes["partial"], c)
language = includedDoc.get_metadata("language")
includedDoc = includedDoc.walk(transform, nContext)
@ -56,6 +56,11 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
}
e = FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[c.get_metadata("language")])
if isinstance(e, Image):
e.attributes["source_dir"] = c.dir
if not "no-srcset" in e.attributes:
e.attributes["no-srcset"] = c.get_metadata("no-srcset") if c.get_metadata("no-srcset") is not None else False
# Execute python code inside source code block
if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and "run" in e.classes:
e = Div(*executeCommand(e.text, None, c))
@ -84,7 +89,7 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
# This is the exact opposite of include. We take the commands
# and flags but drop the content.
elif re.match(r"^#.+$", e.content[0].text):
importedDoc = import_md(open(e.content[0].text[1:], "r").read())
importedDoc = import_md(open(c.dir + "/" + e.content[0].text[1:], "r").read())
importedDoc.walk(transform, c)
return nullify(e)

Loading…
Cancel
Save