diff --git a/.gitignore b/.gitignore index 96223e3..8aa3d05 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,11 @@ **/__pycache__ output.* +*.log +*.aux +test/test.pdf +test/test.tex +public/ +*.png +*.pdf +*.jpeg +*.svg diff --git a/formatitko.py b/formatitko.py index 72dbb7c..f2e54df 100755 --- a/formatitko.py +++ b/formatitko.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 - +import argparse import re import sys from typing import List +import os # Import local files from transform import transform @@ -13,21 +14,30 @@ from group import Group from katex import KatexClient from html import html from tex import tex +from images import ImageProcessor from mj_show import show -doc = import_md(open(sys.argv[1], "r").read()) +parser = argparse.ArgumentParser() +parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of the MarkDown file.", nargs="+", default=[]) +parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", nargs=1, default="public") +parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", nargs=1, default="output.html") +parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", nargs=1, default="output.tex") +parser.add_argument("input_filename", help="The MarkDown file to process.") +args = parser.parse_args() + +doc = import_md(open(args.input_filename, "r").read()) + language = doc.get_metadata("language", None, True) -print(show(doc)) context = Context(doc, sys.argv[1]) + doc = doc.walk(transform, context) + doc.content = [Group(*doc.content, metadata={"language":language})] -#print("---------------------") -#print(show(doc)) -#print(convert_text(doc, input_format="panflute", output_format="markdown")) + katexClient = KatexClient() -#print(katexClient.render("\\def\\Bruh{K^A\\TeX}")) -#print(katexClient.render("\\Bruh")) -open("output.html", "w").write(" " + html(doc, katexClient)) -open("output.tex", "w").write("\input formatitko.tex\n" + tex(doc)) -#print(tex(doc)) +doc_dir = os.path.dirname(args.input_filename) if os.path.dirname(args.input_filename) != "" else "." +imageProcessor = ImageProcessor(args.img_public_dir, doc_dir, *args.img_lookup_dirs) + +open(args.output_html, "w").write(html(doc, katexClient, imageProcessor)) +open(args.output_tex, "w").write(tex(doc, imageProcessor)) diff --git a/html.py b/html.py index 655250c..e9eb063 100644 --- a/html.py +++ b/html.py @@ -3,20 +3,22 @@ from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.util import ClassNotFound +import os from whitespace import NBSP from transform import FQuoted from katex import KatexClient from util import inlinify from group import Group +from images import ImageProcessor -def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") -> str: +def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str: if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": return "" if isinstance(e, ListContainer): - return ''.join([html(child, k, indent_level, indent_str) for child in e]) + return ''.join([html(child, k, i, indent_level, indent_str) for child in e]) tag = e.tag.lower() attributes = "" @@ -95,14 +97,24 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") return f'
{e.text}
' if isinstance(e, Figure): - content_foot = html(e.caption, k, indent_level+1, indent_str) + content_foot = html(e.caption, k, i, indent_level+1, indent_str) if isinstance(e, Caption): tag = "figcaption" if isinstance(e, Image): - # TODO: Image processing - return f'{e.title or html(e.content, k, 0, ' + url = e.url + _, ext = os.path.splitext(url) + ext = ext[1:] + if ext in ["svg", "png", "jpeg", "gif"]: + url = i.process_image(url, ext) + elif ext in ["pdf", "epdf"]: + url = i.process_image(url, "png", dpi=300) + elif ext in ["jpg"]: + url = i.process_image(url, "jpeg") + else: + url = i.process_image(url, ".png") + return f'{e.title or html(e.content, k, i, 0, ' if isinstance(e, Header): tag = "h"+str(e.level) @@ -114,13 +126,13 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") attributes += f' title="{e.title}"' if isinstance(e, LineItem): - return indent_level*indent_str + html(e.content, k) + "
\n" + return indent_level*indent_str + html(e.content, k, i) + "
\n" if isinstance(e, Note): content_head = "(" content_foot = ")" if inlinify(e) is not None: - return f' ({html(inlinify(e), k, 0, "")})' + return f' ({html(inlinify(e), k, i, 0, "")})' if isinstance(e, OrderedList): tag = "ol" @@ -138,8 +150,8 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") # FIXME: Delimeter styles if isinstance(e, Table): - content_head = html(e.head, k, indent_level+1, indent_str) - content_foot = html(e.foot, k, indent_level+1, indent_str) + content_head = html(e.head, k, i, indent_level+1, indent_str) + content_foot = html(e.foot, k, i, indent_level+1, indent_str) # FIXME: Fancy pandoc tables, using colspec if isinstance(e, TableCell): @@ -159,25 +171,25 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") if isinstance(e, FQuoted): if e.style == "cs": if e.quote_type == "SingleQuote": - return f'‚{html(e.content, k, 0, "")}‘' + return f'‚{html(e.content, k, i, 0, "")}‘' elif e.quote_type == "DoubleQuote": - return f'„{html(e.content, k, 0, "")}“' + return f'„{html(e.content, k, i, 0, "")}“' elif e.style == "en": if e.quote_type == "SingleQuote": - return f'‘{html(e.content, k, 0, "")}’' + return f'‘{html(e.content, k, i, 0, "")}’' elif e.quote_type == "DoubleQuote": - return f'“{html(e.content, k, 0, "")}”' + return f'“{html(e.content, k, i, 0, "")}”' else: if e.quote_type == "SingleQuote": - return f'\'{html(e.content, k, 0, "")}\'' + return f'\'{html(e.content, k, i, 0, "")}\'' elif e.quote_type == "DoubleQuote": - return f'"{html(e.content, k, 0, "")}"' + return f'"{html(e.content, k, i, 0, "")}"' else: - return f'"{html(e.content, k, 0, "")}"' + return f'"{html(e.content, k, i, 0, "")}"' if isinstance(e, Group): k.begingroup() - ret = html(e.content, k, indent_level, indent_str) + ret = html(e.content, k, i, indent_level, indent_str) k.endgroup() return ret @@ -204,7 +216,7 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") return "" if isinstance(e, Inline): - return f'<{tag}{attributes}>{content_head}{html(e.content, k, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}' + return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}' out_str = "" if not isinstance(e, Plain): @@ -213,7 +225,7 @@ def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") if hasattr(e, "_content"): if len(e.content) > 0 and isinstance(e.content[0], Inline): out_str += (indent_level+1)*indent_str - out_str += html(e.content, k, indent_level+1, indent_str) + out_str += html(e.content, k, i, indent_level+1, indent_str) if hasattr(e, "text"): out_str += e.text out_str += f"{content_foot}\n" diff --git a/images.py b/images.py new file mode 100644 index 0000000..0444aba --- /dev/null +++ b/images.py @@ -0,0 +1,65 @@ +from typing import List +import os +import shutil +import subprocess + +class ImageProcessor: + def __init__(self, public_dir: str, *lookup_dirs: List[str]): + self.public_dir = public_dir + self.lookup_dirs = lookup_dirs + if not os.path.exists(self.public_dir): + os.mkdir(self.public_dir) + + def process_image(self, input_filename: str, format: str, relative=True, width: int=None, height:int=None, quality: int=None, dpi: int=None) -> str: + name = os.path.basename(input_filename) + base, ext = os.path.splitext(name) + ext = ext[1:] + full_path = self.find_image(input_filename) + if full_path is None: + raise FileNotFoundError(f'Image {input_filename} not found.') + + suffix = "" + geometry = None + if width is not None or height is not None: + geometry = f'{width if width is not None else ""}x{height if height is not None else ""}' + suffix += "_"+geometry + if quality is not None: + suffix += f'_q{quality}' + if quality is not None: + suffix += f'_d{dpi}' + target_name = base+suffix+"."+format + target_path = self.public_dir + "/" + target_name + + if not os.path.isfile(target_path): + if (((ext == format and width) + or (ext == "epdf" and format == "pdf") + or (ext == "jpg" and format == "jpeg")) + and width is None and height is None and quality is None and dpi is None): + shutil.copyfile(full_path, target_path) + + elif self.find_image(target_name): + shutil.copyfile(self.find_image(target_name), target_path) + + elif ext == "svg": + width_arg = ['--export-width', str(width)] if width is not None else [] + height_arg = ['--export-height', str(height)] if height is not None else [] + dpi_arg = ['--export-dpi', str(dpi)] if dpi is not None else [] + if subprocess.run(['inkscape', full_path, '-o', target_path, *width_arg, *height_arg, *dpi_arg]).returncode != 0: + raise Exception(f"Could not convert '{full_path}' to '{format}'") + + else: + resize_arg = ['-resize', str(geometry)] if geometry is not None else [] + density_arg = ['-density', str(dpi)] if dpi is not None else [] + quality_arg = ['-quality', str(quality)] if quality is not None else [] + if subprocess.run(['convert', full_path, *resize_arg, *density_arg, *quality_arg, target_path]).returncode != 0: + raise Exception(f"Could not convert '{full_path}' to '{format}'") + + return target_name if relative else target_path + + + + + def find_image(self, input_filename) -> str: + for dir in self.lookup_dirs: + if os.path.isfile(dir + "/" + input_filename): + return dir + "/" + input_filename diff --git a/katex.py b/katex.py index cac875a..7879e7e 100644 --- a/katex.py +++ b/katex.py @@ -14,7 +14,7 @@ class KatexClient: self._client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self._temp_dir = tempfile.TemporaryDirectory(prefix='formatitko') self._socket_file = self._temp_dir.name + "/katex-socket" - self._server_process = subprocess.Popen(["node", "./katex-server/index.mjs", self._socket_file]) + self._server_process = subprocess.Popen(["node", os.path.dirname(os.path.realpath(__file__)) + "/katex-server/index.mjs", self._socket_file]) while not os.path.exists(self._socket_file): pass while True: diff --git a/test.md b/test.md index b4eb439..692aaa1 100644 --- a/test.md +++ b/test.md @@ -47,7 +47,7 @@ This should only be shown to cats the second time # [$are_we_there_yet]{} -![This is a figure, go figure...](/tmp/logo.pdf){width=10em} +![This is a figure, go figure...](/tmp/logo.pdf) ![This is a figure, go figure...](/tmp/logo.jpg){width=10em} diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..455c41e --- /dev/null +++ b/test/Makefile @@ -0,0 +1,13 @@ +all: test.pdf public/test.html + +output.tex output.html: + ../formatitko.py test.md + +public/test.html: output.html + cat test-top.html output.html > public/test.html + +test.tex: output.tex + cat test-top.tex output.tex > test.tex + +test.pdf: test.tex + TEXINPUTS=.:../ucwmac:${TEXINPUTS} luatex -halt-on-error -interaction nonstopmode test.tex diff --git a/test/logo.jpg b/test/logo.jpg new file mode 100644 index 0000000..544393e Binary files /dev/null and b/test/logo.jpg differ diff --git a/test-import.md b/test/test-import.md similarity index 100% rename from test-import.md rename to test/test-import.md diff --git a/test-partial.md b/test/test-partial.md similarity index 100% rename from test-partial.md rename to test/test-partial.md diff --git a/test/test-top.html b/test/test-top.html new file mode 100644 index 0000000..0cea640 --- /dev/null +++ b/test/test-top.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/test/test-top.tex b/test/test-top.tex new file mode 120000 index 0000000..1bae400 --- /dev/null +++ b/test/test-top.tex @@ -0,0 +1 @@ +../formatitko.tex \ No newline at end of file diff --git a/test/test.md b/test/test.md new file mode 100644 index 0000000..9fa68a7 --- /dev/null +++ b/test/test.md @@ -0,0 +1,166 @@ +--- +title: 'Wooooo a title' +subtitle: 'A subtitle' +are_we_there_yet: False +language: "en" +--- +[#test-import.md]{} + +# Hello world! + +This is an *example* **yay**! + +This is *very **strongly** emphasised* + +Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký kůň pěl dábelské ódy.* **Příliš žluťoučký kůň pěl dábelské ódy.** ***Příliš žluťoučký kůň pěl dábelské ódy.*** + + +:::{partial=test-partial.md} +::: + +:::{if=cat} +This should only be shown to cats +::: + + +```python {.run} +ctx.set_flag("cat", True) +``` + +```python {.run} +println(f"The main document's title is '{ctx.get_metadata('title')}'") +ctx.set_metadata("a", {}) +ctx.set_metadata("a.b", {}) +ctx.set_metadata("a.b.c", "Bruh **bruh** bruh") +``` + +```python {style=native} +def bruh(no): + wat +``` + +Inline `code` + +::::{if=cat} +This should only be shown to cats the second time +:::: + +# [$are_we_there_yet]{} + +![This is a figure, go figure...](logo.svg){width=50%} + +![This is a figure, go figure...](logo.pdf){width=50%} + +![This is a figure, go figure...](logo.jpg){width=50%} + +![This is a figure, go figure...](logo.png){width=10em} + +![Fakt epesní reproduktor](reproduktor.jpeg){width=10em} + +![Fakt epesní reproduktor](reproduktor.png){width=10em} + +```python {.run} +ctx.set_metadata("language", "cs") +``` +[!opendatatask]{} +```python {.run} +ctx.set_metadata("language","en") +``` +[This too!]{if=cat} + +[What]{.co} + +[An inline command with contents and **bold** and another [!nop]{} inside!]{c=nop} + +[!nop]{a=b} + +> OOO a blockquote mate init +> +>> Nesting?? +>> Woah + +A non-breakable space bro + +A lot of spaces + +A text with some inline math: $\sum_{i=1}^nn^2$. Plus some display math: + +A link with the link in the link: + +H~2~O is a liquid. 2^10^ is 1024. + +[Underline]{.underline} + +:::{only=html} +$$ +\def\eqalign#1{\begin{align*}#1\end{align*}} +$$ +::: + +$$ +\eqalign{ + 2 x_2 + 6 x_3 &= 14 \cr + x_1 - 3 x_2 + 2 x_3 &= 5 \cr + -x_1 + 4 x_2 + \phantom{1} x_3 &= 2 +} +$$ + +:::{partial=test-partial.md} +::: + +--- + +This should be seen by all.^[This is a footnote] + +| Matematicko-fyzikální fakulta University Karlovy +| Malostranské nám. 2/25 +| 118 00 Praha 1 + +More footnotes.^[I am a foot] + +To Do: + +- buy eggs +- buy milk +- ??? +- profit + - also create sublists preferrably + +1. Woah +2. Wooo +3. no + +4) WOO + +``` {=html} +
+ +
This is indeed a video
+
+``` + +#. brum +#. BRUHHH +#. woah + +i. bro +ii. wym bro + + ++---------------------+-----------------------+ +| Location | Temperature 1961-1990 | +| | in degree Celsius | ++---------------------+-------+-------+-------+ +| | min | mean | max | ++=====================+=======+=======+======:+ +| Antarctica | -89.2 | N/A | 19.8 | ++---------------------+-------+-------+-------+ +| Earth | -89.2 | 14 | 56.7 | ++---------------------+-------+-------+-------+ + +------- ------ ---------- ------- + 12 12 12 12 + 123 123 123 123 + 1 1 1 1 +------- ------ ---------- ------- + diff --git a/tex.py b/tex.py index e884e24..ff7b950 100644 --- a/tex.py +++ b/tex.py @@ -1,18 +1,20 @@ from panflute import * +import os from whitespace import NBSP from transform import FQuoted from util import inlinify from group import Group +from images import ImageProcessor # Heavily inspired by: git://git.ucw.cz/labsconf2022.git -def tex(e, indent_level: int=0, indent_str: str="\t") -> str: +def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str: if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": return "" if isinstance(e, ListContainer): - return ''.join([tex(child, indent_level, indent_str) for child in e]) + return ''.join([tex(child, i, indent_level, indent_str) for child in e]) content_foot = "" content_head = "" @@ -55,26 +57,26 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str: return e.text.replace(" ", "~").replace(" ", "~") if isinstance(e, Para): - return tex(e.content, 0, "")+"\n\n" + return tex(e.content, i, 0, "")+"\n\n" if isinstance(e, FQuoted): if e.style == "cs": if e.quote_type == "SingleQuote": - return f'‚{tex(e.content, 0, "")}‘' + return f'‚{tex(e.content, i, 0, "")}‘' elif e.quote_type == "DoubleQuote": - return f'„{tex(e.content, 0, "")}“' + return f'„{tex(e.content, i, 0, "")}“' elif e.style == "en": if e.quote_type == "SingleQuote": - return f'‘{tex(e.content, 0, "")}’' + return f'‘{tex(e.content, i, 0, "")}’' elif e.quote_type == "DoubleQuote": - return f'“{tex(e.content, 0, "")}”' + return f'“{tex(e.content, i, 0, "")}”' else: if e.quote_type == "SingleQuote": - return f'\'{tex(e.content, 0, "")}\'' + return f'\'{tex(e.content, i, 0, "")}\'' elif e.quote_type == "DoubleQuote": - return f'"{tex(e.content, 0, "")}"' + return f'"{tex(e.content, i, 0, "")}"' else: - return f'"{tex(e.content, 0, "")}"' + return f'"{tex(e.content, i, 0, "")}"' if isinstance(e, BulletList): tag = "list" @@ -106,14 +108,33 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str: # FIXME: Starting number of list if isinstance(e, Image): - return f'\\image{{width {e.attributes["width"] if "width" in e.attributes else ""}}}{{{e.url}}}' + url = e.url + _, ext = os.path.splitext(url) + ext = ext[1:] + if ext in ["pdf", "png", "jpeg"]: + url = i.process_image(url, ext, relative=False) + elif ext in ["svg"]: + url = i.process_image(url, "pdf", relative=False) + elif ext in ["epdf"]: + url = i.process_image(url, "pdf", relative=False) + elif ext in ["jpg"]: + url = i.process_image(url, "jpeg", relative=False) + else: + url = i.process_image(url, "pdf", relative=False) + width = "" + if "width" in e.attributes: + width = e.attributes["width"] + if e.attributes["width"][-1] == "%": + width = str(int(e.attributes["width"][:-1])/100) + "\\hsize" + width = "width " + width + return f'\\image{{{width}}}{{{url}}}' if isinstance(e, Figure): - return f'\\figure{{{tex(e.content, indent_level+1, indent_str)}}}{{{tex(e.caption, indent_level+1, indent_str)}}}\n\n' + return f'\\figure{{{tex(e.content, i, indent_level+1, indent_str)}}}{{{tex(e.caption, i, indent_level+1, indent_str)}}}\n\n' if isinstance(e, Caption): if inlinify(e) is not None: - return f'\\caption{{{tex(e.content, 0, "")}}}' + return f'\\caption{{{tex(e.content, i, 0, "")}}}' if isinstance(e, ListItem): tag = ":" @@ -134,7 +155,7 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str: if isinstance(e, Note): tag = "fn" if inlinify(e) is not None: - return f'\\fn{{{tex(inlinify(e), 0, "")}}}' + return f'\\fn{{{tex(inlinify(e), i, 0, "")}}}' if isinstance(e, Table): aligns = { @@ -144,16 +165,16 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str: "AlignDefault": "\\quad#\\quad\\hfil" } text = "\strut"+"&".join([aligns[col[0]] for col in e.colspec])+"\cr\n" - text += tex(e.head.content, 0, "") + text += tex(e.head.content, i, 0, "") text += "\\noalign{\\hrule}\n" - text += tex(e.content[0].content, 0, "") + text += tex(e.content[0].content, i, 0, "") text += "\\noalign{\\hrule}\n" - text += tex(e.foot.content, 0, "") + text += tex(e.foot.content, i, 0, "") return "\\vskip1em\n\\halign{"+text+"}\n\\vskip1em\n" # FIXME: Implement rowspan if isinstance(e, TableRow): - return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, 0, "") for cell in e.content])+"\cr\n" + return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, i, 0, "") for cell in e.content])+"\cr\n" if isinstance(e, RawInline): if e.format == "tex": @@ -168,13 +189,13 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str: return "" if isinstance(e, Span) or isinstance(e, Plain): - return tex(e.content, 0, "") + return tex(e.content, i, 0, "") if isinstance(e, LineItem): - return tex(e.content, 0, "") + ("\\\\\n" if e.next else "\n") + return tex(e.content, i, 0, "") + ("\\\\\n" if e.next else "\n") if isinstance(e, LineBlock): - return f'{tex(e.content, indent_level+1, indent_str)}\n' + return f'{tex(e.content, i, indent_level+1, indent_str)}\n' if isinstance(e, Group): tag = "begingroup" @@ -184,19 +205,19 @@ def tex(e, indent_level: int=0, indent_str: str="\t") -> str: close = "\\endgroup" if isinstance(e, Div): - return f'{tex(e.content, indent_level+1, indent_str)}' + return f'{tex(e.content, i, indent_level+1, indent_str)}' if isinstance(e, Doc): - return tex(e.content, indent_level, indent_str)+"\n\\bye" + return tex(e.content, i, indent_level, indent_str)+"\n\\bye" if isinstance(e, Inline): - return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}' + return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}' out_str = "" out_str = f"\\{tag}{arguments}{open}\n" out_str += content_head if hasattr(e, "_content"): - out_str += tex(e.content, indent_level+1, indent_str) + out_str += tex(e.content, i, indent_level+1, indent_str) if hasattr(e, "text"): out_str += e.text out_str += f"{content_foot}\n{close}\n\n"