Main file cleanup, image processing.

This commit is contained in:
Jan Černohorský 2023-02-06 01:00:45 +01:00
parent 1e2b306b15
commit 303dcfaa1f
14 changed files with 362 additions and 57 deletions

9
.gitignore vendored
View file

@ -1,2 +1,11 @@
**/__pycache__
output.*
*.log
*.aux
test/test.pdf
test/test.tex
public/
*.png
*.pdf
*.jpeg
*.svg

View file

@ -1,9 +1,10 @@
#!/usr/bin/env python3
import argparse
import re
import sys
from typing import List
import os
# Import local files
from transform import transform
@ -13,21 +14,30 @@ from group import Group
from katex import KatexClient
from html import html
from tex import tex
from images import ImageProcessor
from mj_show import show
doc = import_md(open(sys.argv[1], "r").read())
parser = argparse.ArgumentParser()
parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of the MarkDown file.", nargs="+", default=[])
parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", nargs=1, default="public")
parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", nargs=1, default="output.html")
parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", nargs=1, default="output.tex")
parser.add_argument("input_filename", help="The MarkDown file to process.")
args = parser.parse_args()
doc = import_md(open(args.input_filename, "r").read())
language = doc.get_metadata("language", None, True)
print(show(doc))
context = Context(doc, sys.argv[1])
doc = doc.walk(transform, context)
doc.content = [Group(*doc.content, metadata={"language":language})]
#print("---------------------")
#print(show(doc))
#print(convert_text(doc, input_format="panflute", output_format="markdown"))
katexClient = KatexClient()
#print(katexClient.render("\\def\\Bruh{K^A\\TeX}"))
#print(katexClient.render("\\Bruh"))
open("output.html", "w").write("<head> <meta charset='utf-8'> <link rel='stylesheet' href='https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css' integrity='sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0' crossorigin='anonymous'> </head>" + html(doc, katexClient))
open("output.tex", "w").write("\input formatitko.tex\n" + tex(doc))
#print(tex(doc))
doc_dir = os.path.dirname(args.input_filename) if os.path.dirname(args.input_filename) != "" else "."
imageProcessor = ImageProcessor(args.img_public_dir, doc_dir, *args.img_lookup_dirs)
open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
open(args.output_tex, "w").write(tex(doc, imageProcessor))

50
html.py
View file

@ -3,20 +3,22 @@ from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound
import os
from whitespace import NBSP
from transform import FQuoted
from katex import KatexClient
from util import inlinify
from group import Group
from images import ImageProcessor
def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") -> str:
def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
return ""
if isinstance(e, ListContainer):
return ''.join([html(child, k, indent_level, indent_str) for child in e])
return ''.join([html(child, k, i, indent_level, indent_str) for child in e])
tag = e.tag.lower()
attributes = ""
@ -95,14 +97,24 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t")
return f'<pre>{e.text}</pre>'
if isinstance(e, Figure):
content_foot = html(e.caption, k, indent_level+1, indent_str)
content_foot = html(e.caption, k, i, indent_level+1, indent_str)
if isinstance(e, Caption):
tag = "figcaption"
if isinstance(e, Image):
# TODO: Image processing
return f'<img src="{e.url}" alt="{e.title or html(e.content, k, 0, "")}">'
url = e.url
_, ext = os.path.splitext(url)
ext = ext[1:]
if ext in ["svg", "png", "jpeg", "gif"]:
url = i.process_image(url, ext)
elif ext in ["pdf", "epdf"]:
url = i.process_image(url, "png", dpi=300)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg")
else:
url = i.process_image(url, ".png")
return f'<img src="{url}" {"style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}">'
if isinstance(e, Header):
tag = "h"+str(e.level)
@ -114,13 +126,13 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t")
attributes += f' title="{e.title}"'
if isinstance(e, LineItem):
return indent_level*indent_str + html(e.content, k) + "<br>\n"
return indent_level*indent_str + html(e.content, k, i) + "<br>\n"
if isinstance(e, Note):
content_head = "("
content_foot = ")"
if inlinify(e) is not None:
return f' <note>({html(inlinify(e), k, 0, "")})</note>'
return f' <note>({html(inlinify(e), k, i, 0, "")})</note>'
if isinstance(e, OrderedList):
tag = "ol"
@ -138,8 +150,8 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t")
# FIXME: Delimeter styles
if isinstance(e, Table):
content_head = html(e.head, k, indent_level+1, indent_str)
content_foot = html(e.foot, k, indent_level+1, indent_str)
content_head = html(e.head, k, i, indent_level+1, indent_str)
content_foot = html(e.foot, k, i, indent_level+1, indent_str)
# FIXME: Fancy pandoc tables, using colspec
if isinstance(e, TableCell):
@ -159,25 +171,25 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t")
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
return f'{html(e.content, k, 0, "")}'
return f'{html(e.content, k, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{html(e.content, k, 0, "")}'
return f'{html(e.content, k, i, 0, "")}'
elif e.style == "en":
if e.quote_type == "SingleQuote":
return f'{html(e.content, k, 0, "")}'
return f'{html(e.content, k, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{html(e.content, k, 0, "")}'
return f'{html(e.content, k, i, 0, "")}'
else:
if e.quote_type == "SingleQuote":
return f'\'{html(e.content, k, 0, "")}\''
return f'\'{html(e.content, k, i, 0, "")}\''
elif e.quote_type == "DoubleQuote":
return f'"{html(e.content, k, 0, "")}"'
return f'"{html(e.content, k, i, 0, "")}"'
else:
return f'"{html(e.content, k, 0, "")}"'
return f'"{html(e.content, k, i, 0, "")}"'
if isinstance(e, Group):
k.begingroup()
ret = html(e.content, k, indent_level, indent_str)
ret = html(e.content, k, i, indent_level, indent_str)
k.endgroup()
return ret
@ -204,7 +216,7 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t")
return ""
if isinstance(e, Inline):
return f'<{tag}{attributes}>{content_head}{html(e.content, k, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}</{tag}>'
return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}</{tag}>'
out_str = ""
if not isinstance(e, Plain):
@ -213,7 +225,7 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t")
if hasattr(e, "_content"):
if len(e.content) > 0 and isinstance(e.content[0], Inline):
out_str += (indent_level+1)*indent_str
out_str += html(e.content, k, indent_level+1, indent_str)
out_str += html(e.content, k, i, indent_level+1, indent_str)
if hasattr(e, "text"):
out_str += e.text
out_str += f"{content_foot}\n"

65
images.py Normal file
View file

@ -0,0 +1,65 @@
from typing import List
import os
import shutil
import subprocess
class ImageProcessor:
def __init__(self, public_dir: str, *lookup_dirs: List[str]):
self.public_dir = public_dir
self.lookup_dirs = lookup_dirs
if not os.path.exists(self.public_dir):
os.mkdir(self.public_dir)
def process_image(self, input_filename: str, format: str, relative=True, width: int=None, height:int=None, quality: int=None, dpi: int=None) -> str:
name = os.path.basename(input_filename)
base, ext = os.path.splitext(name)
ext = ext[1:]
full_path = self.find_image(input_filename)
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
suffix = ""
geometry = None
if width is not None or height is not None:
geometry = f'{width if width is not None else ""}x{height if height is not None else ""}'
suffix += "_"+geometry
if quality is not None:
suffix += f'_q{quality}'
if quality is not None:
suffix += f'_d{dpi}'
target_name = base+suffix+"."+format
target_path = self.public_dir + "/" + target_name
if not os.path.isfile(target_path):
if (((ext == format and width)
or (ext == "epdf" and format == "pdf")
or (ext == "jpg" and format == "jpeg"))
and width is None and height is None and quality is None and dpi is None):
shutil.copyfile(full_path, target_path)
elif self.find_image(target_name):
shutil.copyfile(self.find_image(target_name), target_path)
elif ext == "svg":
width_arg = ['--export-width', str(width)] if width is not None else []
height_arg = ['--export-height', str(height)] if height is not None else []
dpi_arg = ['--export-dpi', str(dpi)] if dpi is not None else []
if subprocess.run(['inkscape', full_path, '-o', target_path, *width_arg, *height_arg, *dpi_arg]).returncode != 0:
raise Exception(f"Could not convert '{full_path}' to '{format}'")
else:
resize_arg = ['-resize', str(geometry)] if geometry is not None else []
density_arg = ['-density', str(dpi)] if dpi is not None else []
quality_arg = ['-quality', str(quality)] if quality is not None else []
if subprocess.run(['convert', full_path, *resize_arg, *density_arg, *quality_arg, target_path]).returncode != 0:
raise Exception(f"Could not convert '{full_path}' to '{format}'")
return target_name if relative else target_path
def find_image(self, input_filename) -> str:
for dir in self.lookup_dirs:
if os.path.isfile(dir + "/" + input_filename):
return dir + "/" + input_filename

View file

@ -14,7 +14,7 @@ class KatexClient:
self._client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self._temp_dir = tempfile.TemporaryDirectory(prefix='formatitko')
self._socket_file = self._temp_dir.name + "/katex-socket"
self._server_process = subprocess.Popen(["node", "./katex-server/index.mjs", self._socket_file])
self._server_process = subprocess.Popen(["node", os.path.dirname(os.path.realpath(__file__)) + "/katex-server/index.mjs", self._socket_file])
while not os.path.exists(self._socket_file):
pass
while True:

View file

@ -47,7 +47,7 @@ This should only be shown to cats the second time
# [$are_we_there_yet]{}
![This is a figure, go figure...](/tmp/logo.pdf){width=10em}
![This is a figure, go figure...](/tmp/logo.pdf)
![This is a figure, go figure...](/tmp/logo.jpg){width=10em}

13
test/Makefile Normal file
View file

@ -0,0 +1,13 @@
all: test.pdf public/test.html
output.tex output.html:
../formatitko.py test.md
public/test.html: output.html
cat test-top.html output.html > public/test.html
test.tex: output.tex
cat test-top.tex output.tex > test.tex
test.pdf: test.tex
TEXINPUTS=.:../ucwmac:${TEXINPUTS} luatex -halt-on-error -interaction nonstopmode test.tex

BIN
test/logo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

8
test/test-top.html Normal file
View file

@ -0,0 +1,8 @@
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta charset='utf-8'>
<link rel='stylesheet' href='https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css' integrity='sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0' crossorigin='anonymous'>
</head>
<body>

1
test/test-top.tex Symbolic link
View file

@ -0,0 +1 @@
../formatitko.tex

166
test/test.md Normal file
View file

@ -0,0 +1,166 @@
---
title: 'Wooooo a title'
subtitle: 'A subtitle'
are_we_there_yet: False
language: "en"
---
[#test-import.md]{}
# Hello world!
This is an *example* **yay**!
This is *very **strongly** emphasised*
Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký kůň pěl dábelské ódy.* **Příliš žluťoučký kůň pěl dábelské ódy.** ***Příliš žluťoučký kůň pěl dábelské ódy.***
:::{partial=test-partial.md}
:::
:::{if=cat}
This should only be shown to cats
:::
```python {.run}
ctx.set_flag("cat", True)
```
```python {.run}
println(f"The main document's title is '{ctx.get_metadata('title')}'")
ctx.set_metadata("a", {})
ctx.set_metadata("a.b", {})
ctx.set_metadata("a.b.c", "Bruh **bruh** bruh")
```
```python {style=native}
def bruh(no):
wat
```
Inline `code`
::::{if=cat}
This should only be shown to cats the second time
::::
# [$are_we_there_yet]{}
![This is a figure, go figure...](logo.svg){width=50%}
![This is a figure, go figure...](logo.pdf){width=50%}
![This is a figure, go figure...](logo.jpg){width=50%}
![This is a figure, go figure...](logo.png){width=10em}
![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}
![Fakt epesní reproduktor](reproduktor.png){width=10em}
```python {.run}
ctx.set_metadata("language", "cs")
```
[!opendatatask]{}
```python {.run}
ctx.set_metadata("language","en")
```
[This too!]{if=cat}
[What]{.co}
[An inline command with contents and **bold** and another [!nop]{} inside!]{c=nop}
[!nop]{a=b}<!-- A special command! WOW -->
> OOO a blockquote mate init
>
>> Nesting??
>> Woah
A non-breakable&nbsp;space bro
A lot of spaces
A text with some inline math: $\sum_{i=1}^nn^2$. Plus some display math:
A link with the link in the link: <https://bruh.com>
H~2~O is a liquid. 2^10^ is 1024.
[Underline]{.underline}
:::{only=html}
$$
\def\eqalign#1{\begin{align*}#1\end{align*}}
$$
:::
$$
\eqalign{
2 x_2 + 6 x_3 &= 14 \cr
x_1 - 3 x_2 + 2 x_3 &= 5 \cr
-x_1 + 4 x_2 + \phantom{1} x_3 &= 2
}
$$
:::{partial=test-partial.md}
:::
---
This should be seen by all.^[This is a footnote]
| Matematicko-fyzikální fakulta University Karlovy
| Malostranské nám. 2/25
| 118 00 Praha 1
More footnotes.^[I am a foot]
To Do:
- buy eggs
- buy milk
- ???
- profit
- also create sublists preferrably
1. Woah
2. Wooo
3. no
4) WOO
``` {=html}
<figure>
<video src="woah.mp4" autoplay></video>
<figcaption> This is indeed a video </figcaption>
</figure>
```
#. brum
#. BRUHHH
#. woah
i. bro
ii. wym bro
+---------------------+-----------------------+
| Location | Temperature 1961-1990 |
| | in degree Celsius |
+---------------------+-------+-------+-------+
| | min | mean | max |
+=====================+=======+=======+======:+
| Antarctica | -89.2 | N/A | 19.8 |
+---------------------+-------+-------+-------+
| Earth | -89.2 | 14 | 56.7 |
+---------------------+-------+-------+-------+
------- ------ ---------- -------
12 12 12 12
123 123 123 123
1 1 1 1
------- ------ ---------- -------

71
tex.py
View file

@ -1,18 +1,20 @@
from panflute import *
import os
from whitespace import NBSP
from transform import FQuoted
from util import inlinify
from group import Group
from images import ImageProcessor
# Heavily inspired by: git://git.ucw.cz/labsconf2022.git
def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex":
return ""
if isinstance(e, ListContainer):
return ''.join([tex(child, indent_level, indent_str) for child in e])
return ''.join([tex(child, i, indent_level, indent_str) for child in e])
content_foot = ""
content_head = ""
@ -55,26 +57,26 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
return e.text.replace(" ", "~").replace("&nbsp;", "~")
if isinstance(e, Para):
return tex(e.content, 0, "")+"\n\n"
return tex(e.content, i, 0, "")+"\n\n"
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
return f'{tex(e.content, 0, "")}'
return f'{tex(e.content, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{tex(e.content, 0, "")}'
return f'{tex(e.content, i, 0, "")}'
elif e.style == "en":
if e.quote_type == "SingleQuote":
return f'{tex(e.content, 0, "")}'
return f'{tex(e.content, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{tex(e.content, 0, "")}'
return f'{tex(e.content, i, 0, "")}'
else:
if e.quote_type == "SingleQuote":
return f'\'{tex(e.content, 0, "")}\''
return f'\'{tex(e.content, i, 0, "")}\''
elif e.quote_type == "DoubleQuote":
return f'"{tex(e.content, 0, "")}"'
return f'"{tex(e.content, i, 0, "")}"'
else:
return f'"{tex(e.content, 0, "")}"'
return f'"{tex(e.content, i, 0, "")}"'
if isinstance(e, BulletList):
tag = "list"
@ -106,14 +108,33 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
# FIXME: Starting number of list
if isinstance(e, Image):
return f'\\image{{width {e.attributes["width"] if "width" in e.attributes else ""}}}{{{e.url}}}'
url = e.url
_, ext = os.path.splitext(url)
ext = ext[1:]
if ext in ["pdf", "png", "jpeg"]:
url = i.process_image(url, ext, relative=False)
elif ext in ["svg"]:
url = i.process_image(url, "pdf", relative=False)
elif ext in ["epdf"]:
url = i.process_image(url, "pdf", relative=False)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", relative=False)
else:
url = i.process_image(url, "pdf", relative=False)
width = ""
if "width" in e.attributes:
width = e.attributes["width"]
if e.attributes["width"][-1] == "%":
width = str(int(e.attributes["width"][:-1])/100) + "\\hsize"
width = "width " + width
return f'\\image{{{width}}}{{{url}}}'
if isinstance(e, Figure):
return f'\\figure{{{tex(e.content, indent_level+1, indent_str)}}}{{{tex(e.caption, indent_level+1, indent_str)}}}\n\n'
return f'\\figure{{{tex(e.content, i, indent_level+1, indent_str)}}}{{{tex(e.caption, i, indent_level+1, indent_str)}}}\n\n'
if isinstance(e, Caption):
if inlinify(e) is not None:
return f'\\caption{{{tex(e.content, 0, "")}}}'
return f'\\caption{{{tex(e.content, i, 0, "")}}}'
if isinstance(e, ListItem):
tag = ":"
@ -134,7 +155,7 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
if isinstance(e, Note):
tag = "fn"
if inlinify(e) is not None:
return f'\\fn{{{tex(inlinify(e), 0, "")}}}'
return f'\\fn{{{tex(inlinify(e), i, 0, "")}}}'
if isinstance(e, Table):
aligns = {
@ -144,16 +165,16 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
"AlignDefault": "\\quad#\\quad\\hfil"
}
text = "\strut"+"&".join([aligns[col[0]] for col in e.colspec])+"\cr\n"
text += tex(e.head.content, 0, "")
text += tex(e.head.content, i, 0, "")
text += "\\noalign{\\hrule}\n"
text += tex(e.content[0].content, 0, "")
text += tex(e.content[0].content, i, 0, "")
text += "\\noalign{\\hrule}\n"
text += tex(e.foot.content, 0, "")
text += tex(e.foot.content, i, 0, "")
return "\\vskip1em\n\\halign{"+text+"}\n\\vskip1em\n"
# FIXME: Implement rowspan
if isinstance(e, TableRow):
return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, 0, "") for cell in e.content])+"\cr\n"
return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, i, 0, "") for cell in e.content])+"\cr\n"
if isinstance(e, RawInline):
if e.format == "tex":
@ -168,13 +189,13 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
return ""
if isinstance(e, Span) or isinstance(e, Plain):
return tex(e.content, 0, "")
return tex(e.content, i, 0, "")
if isinstance(e, LineItem):
return tex(e.content, 0, "") + ("\\\\\n" if e.next else "\n")
return tex(e.content, i, 0, "") + ("\\\\\n" if e.next else "\n")
if isinstance(e, LineBlock):
return f'{tex(e.content, indent_level+1, indent_str)}\n'
return f'{tex(e.content, i, indent_level+1, indent_str)}\n'
if isinstance(e, Group):
tag = "begingroup"
@ -184,19 +205,19 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str:
close = "\\endgroup"
if isinstance(e, Div):
return f'{tex(e.content, indent_level+1, indent_str)}'
return f'{tex(e.content, i, indent_level+1, indent_str)}'
if isinstance(e, Doc):
return tex(e.content, indent_level, indent_str)+"\n\\bye"
return tex(e.content, i, indent_level, indent_str)+"\n\\bye"
if isinstance(e, Inline):
return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}'
return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}'
out_str = ""
out_str = f"\\{tag}{arguments}{open}\n"
out_str += content_head
if hasattr(e, "_content"):
out_str += tex(e.content, indent_level+1, indent_str)
out_str += tex(e.content, i, indent_level+1, indent_str)
if hasattr(e, "text"):
out_str += e.text
out_str += f"{content_foot}\n{close}\n\n"