Browse Source

Merge remote-tracking branch 'origin/master' into jk-bakalarka

jk-bakalarka
Jiří Kalvoda 8 months ago
parent
commit
62263fbe0f
  1. 3
      .gitmodules
  2. 2
      src/formatitko/command.py
  3. 1
      src/formatitko/command_env.py
  4. 2
      src/formatitko/command_util.py
  5. 62
      src/formatitko/context.py
  6. 60
      src/formatitko/formatitko.py
  7. 311
      src/formatitko/html.py
  8. 70
      src/formatitko/html_generator.py
  9. 189
      src/formatitko/images.py
  10. 1
      src/formatitko/katex-server
  11. 1
      src/formatitko/katex-server/.gitignore
  12. 1
      src/formatitko/katex-server/README.md
  13. 1
      src/formatitko/katex-server/index.js
  14. 131
      src/formatitko/katex-server/index.mjs
  15. 39
      src/formatitko/katex-server/package-lock.json
  16. 14
      src/formatitko/katex-server/package.json
  17. 30
      src/formatitko/katex.py
  18. 53
      src/formatitko/nop_processor.py
  19. 118
      src/formatitko/output_generator.py
  20. 270
      src/formatitko/tex.py
  21. 22
      src/formatitko/tex_generator.py
  22. 176
      src/formatitko/transform.py
  23. 117
      src/formatitko/transform_processor.py
  24. 2
      src/formatitko/util.py
  25. 10
      test/test-files/test-partial.md
  26. 8
      test/test-top.html
  27. 26
      test/test.md

3
.gitmodules

@ -1,3 +1,6 @@
[submodule "ucwmac"] [submodule "ucwmac"]
path = ucwmac path = ucwmac
url = git://git.ucw.cz/ucwmac.git url = git://git.ucw.cz/ucwmac.git
[submodule "src/formatitko/katex-server"]
path = src/formatitko/katex-server
url = https://gitea.ks.matfyz.cz:/KSP/formatitko-katex-server

2
src/formatitko/command.py

@ -16,7 +16,7 @@ class InlineCommand(Span, Command):
if len(content) == 1 and (isinstance(content[0], Para) or isinstance(content[0], Plain)): if len(content) == 1 and (isinstance(content[0], Para) or isinstance(content[0], Plain)):
return Span(*content[0].content) return Span(*content[0].content)
else: else:
raise InlineError(f"The command {self.attributes['c']} returned multiple Paragraphs and must be executed using `::: {{c={self.attributes['c']}}}\\n:::`.\n\n{content}") return Div(*content)
pass pass
class BlockCommand(Div, Command): class BlockCommand(Div, Command):

1
src/formatitko/command_env.py

@ -5,4 +5,5 @@ from formatitko.util import parse_string
from formatitko.context import Context from formatitko.context import Context
from formatitko.command import Command from formatitko.command import Command
from .nop_processor import NOPProcessor
from panflute import Element from panflute import Element

2
src/formatitko/command_util.py

@ -15,7 +15,7 @@ def parse_command(code: str) -> CommandCallable:
indented_code_lines = [] indented_code_lines = []
for line in code_lines: for line in code_lines:
indented_code_lines.append(("\t" if tabs else " ")+line) indented_code_lines.append(("\t" if tabs else " ")+line)
code = "def command(element: Command, context: Context) -> list[Element]:\n"+"\n".join(indented_code_lines) code = "def command(element: Command, context: Context, processor: NOPProcessor) -> list[Element]:\n"+"\n".join(indented_code_lines)
env = {**command_env.__dict__} env = {**command_env.__dict__}
exec(code, env) exec(code, env)
return env["command"] return env["command"]

62
src/formatitko/context.py

@ -3,11 +3,10 @@ from panflute import Doc, Element, Div, Span, Header
from typing import Union, Callable from typing import Union, Callable
from types import ModuleType from types import ModuleType
import os import os
import warnings
from .command import Command from .command import Command
CommandCallable = Callable[[Command, 'Context'], list[Element]] # This is here because of a wild circular import dependency between many functions and classes CommandCallable = Callable[[Command, 'Context', 'NOPProcessor'], list[Element]] # This is here because of a wild circular import dependency between many functions and classes
# This class is used to keep state while transforming the document using # This class is used to keep state while transforming the document using
# transform.py. For the context to be available to the html and TeX generators, # transform.py. For the context to be available to the html and TeX generators,
@ -31,11 +30,15 @@ def default_number_generator(e: Header, context: 'Context') -> str:
class Context: class Context:
parent: Union["Context", None] parent: Union["Context", None]
_commands: dict[str, Union[CommandCallable, None]] _commands: dict[str, Union[CommandCallable, None]]
_data: dict[str, object]
doc: Doc doc: Doc
trusted: bool trusted: bool
path: str path: str
dir: str dir: str
filename: str filename: str
root_dir: str # Absolute path to the dir of the file formátítko was called on
rel_dir: str # Relative path to the current dir from the root dir
deps: set[str]
section_counters: list[int] section_counters: list[int]
number_generator: Callable[[Header, 'Context'], str] number_generator: Callable[[Header, 'Context'], str]
@ -43,11 +46,16 @@ class Context:
def __init__(self, doc: Doc, path: str, parent: Union['Context', None]=None, trusted: bool=True): def __init__(self, doc: Doc, path: str, parent: Union['Context', None]=None, trusted: bool=True):
self.parent = parent self.parent = parent
self._commands = {} self._commands = {}
self._data = {}
self.doc = doc self.doc = doc
self.trusted = trusted self.trusted = trusted
self.path = path self.path = path
self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "." self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "."
self.filename = os.path.basename(path) self.filename = os.path.basename(path)
self.root_dir = parent.root_dir if parent else os.path.abspath(self.dir)
self.rel_dir = os.path.relpath(self.dir, self.root_dir)
self.deps = set()
self.add_dep(path)
if self.get_metadata("flags", immediate=True) is None: if self.get_metadata("flags", immediate=True) is None:
self.set_metadata("flags", {}) self.set_metadata("flags", {})
self.number_generator = default_number_generator self.number_generator = default_number_generator
@ -122,7 +130,57 @@ class Context:
else: else:
self.set_metadata(key, data) self.set_metadata(key, data)
def get_data(self, key: str, immediate: bool=False):
data = self._data
keys = key.split(".")
try:
for k in keys:
data = data[k]
return data
except KeyError:
if self.parent and not immediate:
return self.parent.get_data(key)
else:
return None
def set_data(self, key: str, value: object):
data = self._data
keys = key.split(".")
for k in keys[:-1]:
try:
data = data[k]
except KeyError:
data[k] = {}
data = data[k]
data[keys[-1]] = value
def unset_data(self, key: str):
if key == "":
self._doc = {}
data = self._doc
keys = key.split(".")
for k in keys[:-1]:
data = data[k]
del data[keys[-1]]
def get_deps(self) -> list[str]:
if self.parent is not None:
return self.parent.get_deps()
else:
return self.deps
def add_dep(self, dep: str):
self.get_deps().add(os.path.abspath(dep))
def add_deps(self, deps: list[str]):
self.get_deps().update([os.path.abspath(path) for path in deps])
def get_context_from_doc(doc: Doc) -> Context:
if len(doc.content) == 1 and isinstance(doc.content[0], Group):
return doc.content[0].context
else:
return None
# This is a custom element which creates \begingroup \endgroup groups in TeX # This is a custom element which creates \begingroup \endgroup groups in TeX
# and also causes KaTeX math blocks to be isolated in a similar way. # and also causes KaTeX math blocks to be isolated in a similar way.

60
src/formatitko/formatitko.py

@ -7,18 +7,15 @@ import subprocess
import shutil import shutil
# Import local files # Import local files
from .transform import transform
from .util import import_md from .util import import_md
from .context import Context, BlockGroup
from .katex import KatexClient from .katex import KatexClient
from .html import html from .images import ImageProcessor, ImageProcessorNamespace
from .tex import tex from .output_generator import OutputGenerator, FormatitkoRecursiveError
from .images import ImageProcessor from .html_generator import HTMLGenerator, StandaloneHTMLGenerator
from .output_generator import OutputGenerator
from .html_generator import HTMLGenerator
from .transform_processor import TransformProcessor from .transform_processor import TransformProcessor
from .pandoc_processor import PandocProcessor from .pandoc_processor import PandocProcessor
from .tex_generator import UCWTexGenerator from .tex_generator import UCWTexGenerator
from .context import get_context_from_doc
from panflute import convert_text from panflute import convert_text
@ -30,6 +27,7 @@ def main():
parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache") parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache")
parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/") parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/")
parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.") parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.")
parser.add_argument("-s", "--output-standalone-html", help="The Standalone HTML file to write into. A full page is generated instead of just a fragment.")
parser.add_argument("-t", "--output-tex", help="The TEX file to write into.") parser.add_argument("-t", "--output-tex", help="The TEX file to write into.")
parser.add_argument("-m", "--output-md", help="The Markdown file to write into. (Uses pandoc to generate markdown)") parser.add_argument("-m", "--output-md", help="The Markdown file to write into. (Uses pandoc to generate markdown)")
parser.add_argument("-j", "--output-json", help="The JSON file to dump the pandoc-compatible AST into.") parser.add_argument("-j", "--output-json", help="The JSON file to dump the pandoc-compatible AST into.")
@ -38,6 +36,8 @@ def main():
parser.add_argument("-k", "--katex-socket", help="The KaTeX server socket filename obtained by running with `--katex-server`.") parser.add_argument("-k", "--katex-socket", help="The KaTeX server socket filename obtained by running with `--katex-server`.")
parser.add_argument("input_filename", help="The markdown file to process.", nargs="?" if "--katex-server" in sys.argv else None) parser.add_argument("input_filename", help="The markdown file to process.", nargs="?" if "--katex-server" in sys.argv else None)
parser.add_argument("--debug", action='store_true') parser.add_argument("--debug", action='store_true')
parser.add_argument("--traceback-limit", help="Traceback limit for when errors happen, defaults to 0, as it is only useful for internal debugging.", default=0)
parser.add_argument("--deps", help="File to write list of dependencies to. May depend on output formats used.")
args = parser.parse_args() args = parser.parse_args()
if args.katex_server: if args.katex_server:
@ -54,22 +54,43 @@ def main():
doc = import_md(open(args.input_filename, "r").read()) doc = import_md(open(args.input_filename, "r").read())
if args.debug: if args.debug:
OutputGenerator(sys.stdout).generate(doc) try:
OutputGenerator(sys.stdout).generate(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
doc = TransformProcessor(args.input_filename).transform(doc) try:
doc = TransformProcessor(args.input_filename).transform(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
# Initialize the image processor (this just keeps some basic state) # Initialize the image processor (this just keeps some basic state)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs) imageProcessor = ImageProcessor({"": ImageProcessorNamespace(args.img_public_dir, args.img_web_path, args.img_cache_dir, args.img_lookup_dirs, True)})
if args.output_html is not None: if args.output_html is not None:
# Initialize KaTeX client (this runs the node app and connects to a unix socket) # Initialize KaTeX client (this runs the node app and connects to a unix socket)
with KatexClient(socket=args.katex_socket) as katexClient: with KatexClient(socket=args.katex_socket) as katexClient:
with open(args.output_html, "w") as file: with open(args.output_html, "w") as file:
HTMLGenerator(file, katexClient, imageProcessor).generate(doc) try:
HTMLGenerator(file, katexClient, imageProcessor).generate(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
if args.output_standalone_html is not None:
# Initialize KaTeX client (this runs the node app and connects to a unix socket)
with KatexClient(socket=args.katex_socket) as katexClient:
with open(args.output_standalone_html, "w") as file:
try:
StandaloneHTMLGenerator(file, katexClient, imageProcessor).generate(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
if args.output_tex is not None: if args.output_tex is not None:
with open(args.output_tex, "w") as file: with open(args.output_tex, "w") as file:
UCWTexGenerator(file, imageProcessor).generate(doc) try:
UCWTexGenerator(file, imageProcessor).generate(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
if args.output_md is not None: if args.output_md is not None:
with open(args.output_md, "w") as file: with open(args.output_md, "w") as file:
@ -83,7 +104,10 @@ def main():
if args.output_tex is None: if args.output_tex is None:
fd = tempfile.NamedTemporaryFile(dir=".", suffix=".tex") fd = tempfile.NamedTemporaryFile(dir=".", suffix=".tex")
with open(fd.name, "w") as file: with open(fd.name, "w") as file:
UCWTexGenerator(file, imageProcessor).generate(doc) try:
UCWTexGenerator(file, imageProcessor).generate(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
filename = fd.name filename = fd.name
else: else:
filename = args.output_tex filename = args.output_tex
@ -91,9 +115,17 @@ def main():
subprocess.run(["pdfcsplain", "-halt-on-error", "-output-directory="+outdir.name, "-jobname=formatitko", filename], check=True) subprocess.run(["pdfcsplain", "-halt-on-error", "-output-directory="+outdir.name, "-jobname=formatitko", filename], check=True)
shutil.move(outdir.name+"/formatitko.pdf", args.output_pdf) shutil.move(outdir.name+"/formatitko.pdf", args.output_pdf)
if args.deps is not None:
with open(args.deps, "w") as file:
for dep in get_context_from_doc(doc).get_deps():
file.write(dep + "\n")
if args.debug: if args.debug:
print("-----------------------------------") print("-----------------------------------")
OutputGenerator(sys.stdout).generate(doc) try:
OutputGenerator(sys.stdout).generate(doc)
except FormatitkoRecursiveError as e:
e.pretty_print(tracebacklimit=args.traceback_limit)
if __name__ == "__main__": if __name__ == "__main__":

311
src/formatitko/html.py

@ -1,311 +0,0 @@
from panflute import *
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound
import os
from typing import Union
from .whitespace import NBSP
from .elements import FQuoted
from .katex import KatexClient
from .util import inlinify
from .context import Group
from .images import ImageProcessor
import warnings
warnings.warn("The html function has been deprecated, is left only for reference and will be removed in future commits. HTML_generator should be used in its place.", DeprecationWarning)
def html(e: Union[Element, ListContainer], k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
warnings.warn("The html function has been deprecated, is left only for reference and will be removed in future commits. HTML_generator should be used in its place.", DeprecationWarning)
# `only` attribute which makes transformed elements appear only in tex
# output or html output
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
return ""
if isinstance(e, ListContainer):
return ''.join([html(child, k, i, indent_level, indent_str) for child in e])
# Bits from which the final element output is built at the end of this
# function. Most elements override this by returning their own output.
tag = e.tag.lower()
attributes = ""
content_foot = ""
content_head = ""
if isinstance(e, Str):
return e.text.replace(" ", " ")
# Most elements fit the general template at the end of the function, just
# need their html tag specified.
tags = {
BulletList: "ul",
Doc: "main",
Emph: "em",
Caption: "figcaption",
Para: "p",
Header: "h"+str(e.level) if isinstance(e, Header) else "",
LineBlock: "p",
ListItem: "li",
SmallCaps: "span",
Strikeout: "strike",
Subscript: "sub",
Superscript: "sup",
Underline: "u",
TableBody: "tbody",
TableHead: "thead",
TableFoot: "tfoot",
TableRow: "tr",
TableCell: "td",
}
if type(e) in tags:
tag = tags[type(e)]
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
not_implemented = {
Citation: True,
Cite: True,
Definition: True,
DefinitionItem: True,
DefinitionList: True
}
if type(e) in not_implemented:
return f'<!-- FIXME: {type(e)}s not implemented -->'
# Elements which can be represented by a simple string
simple_string = {
NBSP: "&nbsp;",
Space: " ",
Null: "",
LineBreak: f"\n{indent_level*indent_str}<br>\n{indent_level*indent_str}",
SoftBreak: f" ",
HorizontalRule: f"{indent_level*indent_str}<hr>\n"
}
if type(e) in simple_string:
return simple_string[type(e)]
if hasattr(e, "identifier") and e.identifier != "":
attributes += f' id="{e.identifier}"'
if hasattr(e, "classes") and len(e.classes) != 0:
attributes += f' class="{" ".join(e.classes)}"'
# Attributes are only passed down manually, because we use them internally.
# Maybe this should be a blocklist instead of an allowlist?
# Overriding elements with their own returns
if isinstance(e, CodeBlock):
if len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):
# Syntax highlighting using pygments
for cl in e.classes:
try:
lexer = get_lexer_by_name(cl)
except ClassNotFound:
continue
break
else:
print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}")
formatter = HtmlFormatter(style=e.attributes["style"])
result = highlight(e.text, lexer, formatter)
return f'{result}'
else:
return f'<pre>{e.text}</pre>'
if isinstance(e, Doc):
formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
content_head = f'<style>{formatter.get_style_defs(".highlight")}</style>'
if isinstance(e, Image):
url = e.url
# Attributes → image processor args
additional_args = {}
if "file-width" in e.attributes:
additional_args["width"] = int(e.attributes["file-width"])
if "file-height" in e.attributes:
additional_args["height"] = int(e.attributes["file-height"])
if "file-quality" in e.attributes:
additional_args["quality"] = int(e.attributes["file-quality"])
if "file-dpi" in e.attributes:
additional_args["dpi"] = int(e.attributes["file-dpi"])
# The directory of the current file, will also look for images there.
source_dir = e.attributes["source_dir"]
_, ext = os.path.splitext(url)
ext = ext[1:]
# Conversions between various formats.
if ext in ["svg", "png", "jpeg", "gif"]:
# Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the output
# directory.
url = i.process_image(url, ext, source_dir, **additional_args)
elif ext in ["pdf", "epdf"]:
if not "dpi" in additional_args:
additional_args["dpi"] = 300
url = i.process_image(url, "png", source_dir, **additional_args)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", source_dir, **additional_args)
else:
url = i.process_image(url, "png", source_dir, **additional_args)
# Srcset generation - multiple alternative sizes of images browsers can
# choose from.
_, ext = os.path.splitext(url)
ext = ext[1:]
srcset = []
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
# This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
width, height = i.get_image_size(url, [i.public_dir])
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes:
if width <= size[0] and height <= size[1]:
srcset.append((f'{i.web_path}/{url}', f'{width}w'))
break
quality = size[2] if ext == "jpeg" else None
srcset.append((f'{i.web_path}/{i.process_image(url, ext, i.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
url = i.web_path + "/" + url
attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"'
if len(srcset) != 0:
return f'<a href="{url}"><img src="{srcset[-1][0]}" srcset="{", ".join([" ".join(src) for src in srcset])}"{attributes}></a>'
else:
return f'<img src="{url}"{attributes}>'
# See https://pandoc.org/MANUAL.html#line-blocks
if isinstance(e, LineItem):
return indent_level*indent_str + html(e.content, k, i) + "<br>\n"
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before me))
if isinstance(e, Note):
content_head = "("
content_foot = ")"
if inlinify(e) is not None:
return f' <note>({html(inlinify(e), k, i, 0, "")})</note>'
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
return f'{html(e.content, k, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{html(e.content, k, i, 0, "")}'
elif e.style == "en":
if e.quote_type == "SingleQuote":
return f'{html(e.content, k, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{html(e.content, k, i, 0, "")}'
else:
if e.quote_type == "SingleQuote":
return f'\'{html(e.content, k, i, 0, "")}\''
elif e.quote_type == "DoubleQuote":
return f'"{html(e.content, k, i, 0, "")}"'
else:
return f'"{html(e.content, k, i, 0, "")}"'
if isinstance(e, Group):
k.begingroup()
ret = html(e.content, k, i, indent_level, indent_str)
k.endgroup()
return ret
if isinstance(e, Math):
formats = {
"DisplayMath": True,
"InlineMath": False
}
return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]})
if isinstance(e, RawInline):
if e.format == "html":
return e.text
else:
return ""
if isinstance(e, RawBlock):
if e.format == "html":
return f'{e.text}\n'
else:
return ""
# Non-overriding elements, they get generated using the template at the end
# of this function
if isinstance(e, Header):
tag = "h"+str(e.level)
if isinstance(e, Figure):
content_foot = html(e.caption, k, i, indent_level+1, indent_str)
if isinstance(e, Caption):
tag = "figcaption"
if isinstance(e, Link):
tag = "a"
attributes += f' href="{e.url}"'
if e.title:
attributes += f' title="{e.title}"'
if isinstance(e, OrderedList):
tag = "ol"
if e.start and e.start != 1:
attributes += f' start="{e.start}"'
html_styles = {
"Decimal": "1",
"LowerRoman": "i",
"UpperRoman:": "I",
"LowerAlpha": "a",
"UpperAlpha": "A"
}
if e.style and e.style != "DefaultStyle":
attributes += f' type="{html_styles[e.style]}"'
# FIXME: Delimeter styles
if isinstance(e, Table):
content_head = html(e.head, k, i, indent_level+1, indent_str)
content_foot = html(e.foot, k, i, indent_level+1, indent_str)
# FIXME: Fancy pandoc tables, using colspec
if isinstance(e, TableCell):
tag = "td"
if e.colspan != 1:
attributes += f' colspan="{e.colspan}"'
if e.rowspan != 1:
attributes += f' rowspan="{e.rowspan}"'
aligns = {
"AlignLeft": "left",
"AlignRight": "right",
"AlignCenter": "center"
}
if e.alignment and e.alignment != "AlignDefault":
attributes += f' style="text-align: {aligns[e.alignment]}"'
# The default which all non-overriding elements get generated by. This
# includes elements, which were not explicitly mentioned in this function,
# e. g. Strong
if isinstance(e, Inline):
return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}</{tag}>'
out_str = ""
if not isinstance(e, Plain):
out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n"
out_str += content_head
if hasattr(e, "_content"):
if len(e.content) > 0 and isinstance(e.content[0], Inline):
out_str += (indent_level+1)*indent_str
out_str += html(e.content, k, i, indent_level+1, indent_str)
if hasattr(e, "text"):
out_str += e.text
out_str += f"{content_foot}\n"
if not isinstance(e, Plain):
out_str += f"{indent_level*indent_str}</{tag}>\n"
return out_str

70
src/formatitko/html_generator.py

@ -17,9 +17,10 @@ from .whitespace import NBSP
from .context import Group, BlockGroup, InlineGroup from .context import Group, BlockGroup, InlineGroup
from .output_generator import OutputGenerator from .output_generator import OutputGenerator
from .katex import KatexClient from .katex import KatexClient
from .images import ImageProcessor from .images import ImageProcessor, ImageProcessorNamespaceSearcher
from .util import inlinify from .util import inlinify
class HTMLGenerator(OutputGenerator): class HTMLGenerator(OutputGenerator):
imageProcessor: ImageProcessor imageProcessor: ImageProcessor
katexClient: KatexClient katexClient: KatexClient
@ -136,9 +137,15 @@ class HTMLGenerator(OutputGenerator):
url = e.url url = e.url
additional_args = self.get_image_processor_args(e.attributes) additional_args = self.get_image_processor_args(e.attributes)
additional_args["context"] = self.context
# The directory of the current file, will also look for images there. # The directory of the current file relative to the current working directory
source_dir = self.context.dir source_dir = self.context.dir
# The directory of the current file relative to the md file we were called on
rel_dir = self.context.rel_dir
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
url = self.imageProcessor.get_path_without_namespace(url)
_, ext = os.path.splitext(url) _, ext = os.path.splitext(url)
ext = ext[1:] ext = ext[1:]
@ -148,16 +155,16 @@ class HTMLGenerator(OutputGenerator):
# Even supported elements have to be 'converted' because the # Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the output # processing contains finding and moving them to the output
# directory. # directory.
url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) url = self.imageProcessor.process_image(url, ext, searcher, **additional_args)
elif ext in ["pdf", "epdf","asy"]: elif ext in ["pdf", "epdf","asy"]:
# Only relevant for when these were PNGs, leaving this here for future reference. # Only relevant for when these were PNGs, leaving this here for future reference.
# if not "dpi" in additional_args: # if not "dpi" in additional_args:
# additional_args["dpi"] = 300 # additional_args["dpi"] = 300
url = self.imageProcessor.process_image(url, "svg", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args)
elif ext in ["jpg"]: elif ext in ["jpg"]:
url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args)
else: else:
url = self.imageProcessor.process_image(url, "png", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "png", searcher, **additional_args)
# Srcset generation - multiple alternative sizes of images browsers can # Srcset generation - multiple alternative sizes of images browsers can
# choose from. # choose from.
@ -168,23 +175,25 @@ class HTMLGenerator(OutputGenerator):
# This is inspired by @vojta001's blogPhoto shortcode he made for # This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz: # patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir]) width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir()))
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes: for size in sizes:
if width <= size[0] and height <= size[1]: if width <= size[0] and height <= size[1]:
srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w')) srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w'))
break break
quality = size[2] if ext == "jpeg" else None quality = size[2] if ext == "jpeg" else None
cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality) cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality)
self.imageProcessor.publish_image(cache_img) searcher.publish_image(cache_img)
srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w')) srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w'))
self.imageProcessor.publish_image(url) searcher.publish_image(url)
url = self.imageProcessor.web_path + "/" + url url = searcher.get_web_path() + "/" + url
attributes = self.common_attributes(e) attributes = self.common_attributes(e)
if "width" in e.attributes: if "width" in e.attributes:
attributes["width"] = e.attributes["width"] attributes["width"] = e.attributes["width"]
if "height" in e.attributes:
attributes["height"] = e.attributes["height"]
if e.title: if e.title:
attributes["alt"] = e.title attributes["alt"] = e.title
@ -199,6 +208,10 @@ class HTMLGenerator(OutputGenerator):
else: else:
attributes["src"] = url attributes["src"] = url
if e.attributes["no-img-link"]:
self.write(self.single_tag("img", attributes))
return
img = RawInline(self.single_tag("img", attributes)) img = RawInline(self.single_tag("img", attributes))
link = Link(img, url=url) link = Link(img, url=url)
@ -310,3 +323,34 @@ class HTMLGenerator(OutputGenerator):
def generate_DefinitionList(self, e: DefinitionList): def generate_DefinitionList(self, e: DefinitionList):
self.writeln("<!-- FIXME: DefinitionLists not implemented -->") self.writeln("<!-- FIXME: DefinitionLists not implemented -->")
class StandaloneHTMLGenerator(HTMLGenerator):
def generate_Doc(self, e: Doc):
self.writeraw("<!DOCTYPE html>")
self.writeln(self.start_tag("html", attributes={"lang": e.get_metadata("lang", None, True)}))
self.writeln(self.start_tag("head"))
self.indent_more()
self.writeln(self.single_tag("meta", attributes={"charset": "utf-8"}))
self.writeln(self.single_tag("meta", attributes={"viewport": "width=device-width, initial-scale=1.0"}))
self.writeln(self.single_tag("link", attributes={"rel": "stylesheet", "href": "https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css", "integrity":"sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0", "crossorigin":"anonymous"}))
if "title" in e.metadata:
self.write(self.start_tag("title"))
self.generate(e.metadata["title"])
self.write(self.end_tag("title"))
self.endln()
if "html-head-includes" in e.metadata:
self.generate(e.metadata["html-head-includes"])
self.indent_less()
self.writeln(self.end_tag("head"))
self.writeln(self.start_tag("body"))
self.indent_more()
super().generate_Doc(e)
self.indent_less()
self.writeln(self.end_tag("body"))
self.writeln(self.end_tag("html"))

189
src/formatitko/images.py

@ -4,46 +4,171 @@ import shutil
import subprocess import subprocess
from PIL import Image from PIL import Image
from .context import Context
class FileInWrongDirError(Exception): class FileInWrongDirError(Exception):
pass pass
class ConversionProgramError(Exception): class ConversionProgramError(Exception):
pass pass
class InkscapeError(ConversionProgramError): class InkscapeError(ConversionProgramError):
pass pass
class ImageMagickError(ConversionProgramError): class ImageMagickError(ConversionProgramError):
pass pass
class AsyError(ConversionProgramError): class AsyError(ConversionProgramError):
pass pass
class ImageProcessorNamespace:
class ImageProcessor:
public_dir: str public_dir: str
cache_dir: str cache_dir: str
lookup_dirs: list[str] lookup_dirs: list[str]
web_path: str web_path: str
include_src: bool
def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: list[str]): def __init__(self, public_dir: str, web_path: str, cache_dir: str, lookup_dirs: list[str], include_src: bool):
self.public_dir = public_dir self.public_dir = public_dir
self.cache_dir = cache_dir self.cache_dir = cache_dir
self.lookup_dirs = lookup_dirs self.lookup_dirs = lookup_dirs
self.web_path = web_path if web_path[-1] != "/" else web_path[:-1] self.web_path = web_path if web_path[-1] != "/" else web_path[:-1]
if not os.path.exists(self.public_dir): self.include_src = include_src
os.mkdir(self.public_dir)
class ImageProcessorSearcher:
def get_lookup_dirs(self) -> list[str]:
return []
def get_cache_dir(self) -> str:
return ""
def get_public_dir(self) -> str:
return ""
def get_web_path(self) -> str:
return ""
def find_image_in_dir(self, input_filename: str, dir: str) -> Union[str, None]:
if os.path.isfile(dir + "/" + input_filename):
return dir + "/" + input_filename
else:
return None
def find_image(self, input_filename: str) -> Union[str, None]:
for dir in self.get_lookup_dirs():
image = self.find_image_in_dir(input_filename, dir)
if image:
return image
return None
def publish_image(self, target_name, relative: bool=True) -> str:
cache_path = self.get_cache_dir() + "/" + target_name
if not os.path.isfile(cache_path):
raise FileNotFoundError(f'Image {target_name} not cached')
target_path = self.get_public_dir() + "/" + target_name
try:
if os.path.exists(target_path):
if os.path.getmtime(cache_path) > os.path.getmtime(target_path):
os.remove(target_path)
os.link(cache_path, target_path)
else:
os.link(cache_path, target_path)
except OSError as e:
if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy
shutil.copyfile(cache_path, target_path)
else:
raise e
return target_name if relative else target_path
class ImageProcessorCacheSearcher(ImageProcessorSearcher):
cache_dir: str
def __init__(self, cache_dir: str):
self.cache_dir = cache_dir
if not os.path.exists(self.cache_dir): if not os.path.exists(self.cache_dir):
os.mkdir(self.cache_dir) os.makedirs(self.cache_dir, exist_ok=True)
def get_lookup_dirs(self) -> list[str]:
return [self.cache_dir]
def get_cache_dir(self) -> str:
return self.cache_dir
def get_public_dir(self) -> str:
return ""
def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: def get_web_path(self) -> str:
return ""
def publish_image(self, target_name, relative: bool=True) -> str:
raise NotImplementedError();
class ImageProcessorNamespaceSearcher(ImageProcessorSearcher):
namespace: ImageProcessorNamespace
rel_dir: str
source_dir: str
def __init__(self, namespace: ImageProcessorNamespace, rel_dir: str, source_dir: str):
self.namespace = namespace
self.rel_dir = rel_dir
self.source_dir = source_dir
def get_lookup_dirs(self) -> list[str]:
return self.namespace.lookup_dirs + ([self.source_dir] if self.namespace.include_src else [])
def transform_path(self, path: str) -> str:
return path.replace("$dir", self.rel_dir)
def get_cache_dir(self) -> str:
cache_dir = self.transform_path(self.namespace.cache_dir)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir, exist_ok=True)
return cache_dir
def get_public_dir(self) -> str:
public_dir = self.transform_path(self.namespace.public_dir)
if not os.path.exists(public_dir):
os.makedirs(public_dir, exist_ok=True)
return public_dir
def get_web_path(self) -> str:
return self.transform_path(self.namespace.web_path)
def get_cache_searcher(self) -> ImageProcessorCacheSearcher:
return ImageProcessorCacheSearcher(self.get_cache_dir())
class ImageProcessor:
namespaces: dict[str, ImageProcessorNamespace]
def __init__(self, namespaces: dict[str, ImageProcessorNamespace]):
self.namespaces = namespaces
def get_namespace_by_path(self, path: str) -> ImageProcessorNamespace:
return self.namespaces[path.split(":")[0] if ":" in path else ""]
def get_path_without_namespace(self, path: str) -> str:
if len(path.split(":")) <= 1:
return path
return ":".join(path.split(":")[1:])
def get_searcher_by_path(self, path: str, rel_dir: str, source_dir: str) -> ImageProcessorNamespaceSearcher:
return ImageProcessorNamespaceSearcher(self.get_namespace_by_path(path), rel_dir, source_dir)
def process_image(self, input_filename: str, format: str, searcher: ImageProcessorSearcher, context: Context=None, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str:
name = os.path.basename(input_filename) name = os.path.basename(input_filename)
base, ext = os.path.splitext(name) base, ext = os.path.splitext(name)
ext = ext[1:] ext = ext[1:]
full_path = self.find_image(input_filename, [source_dir]) full_path = searcher.find_image(input_filename)
if full_path is None: if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.') raise FileNotFoundError(f'Image {input_filename} not found in {searcher.get_lookup_dirs()}.')
if format == "jpg": if format == "jpg":
format = "jpeg" format = "jpeg"
@ -51,7 +176,7 @@ class ImageProcessor:
# Locate all dependencies # Locate all dependencies
deps_full = [full_path] deps_full = [full_path]
for dep in deps: for dep in deps:
dep_full_path = self.find_image(dep, [source_dir]) dep_full_path = searcher.find_image(dep)
if dep_full_path is None: if dep_full_path is None:
raise FileNotFoundError(f'Image dependency {dep} not found.') raise FileNotFoundError(f'Image dependency {dep} not found.')
deps_full.append(dep_full_path) deps_full.append(dep_full_path)
@ -65,7 +190,7 @@ class ImageProcessor:
if quality is not None: if quality is not None:
suffix += f'_q{quality}' suffix += f'_q{quality}'
target_name = base+suffix+"."+format target_name = base+suffix+"."+format
target_path = self.cache_dir + "/" + target_name target_path = searcher.get_cache_dir() + "/" + target_name
# Only regenerate if the file doesn't already exist and no dependencies are newer # Only regenerate if the file doesn't already exist and no dependencies are newer
if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full): if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full):
@ -80,13 +205,13 @@ class ImageProcessor:
# Try to find the converted filename in lookup_dirs, if you find # Try to find the converted filename in lookup_dirs, if you find
# it, don't convert, just copy. # it, don't convert, just copy.
elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps): elif searcher.find_image(target_name) is not None and not self.is_outdated(searcher.find_image(target_name), deps):
shutil.copyfile(self.find_image(target_name, [source_dir]), target_path) shutil.copyfile(searcher.find_image(target_name), target_path)
# Process asymptote # Process asymptote
elif ext == "asy": elif ext == "asy":
# Collect dependencies # Collect dependencies
deps_dir = self.cache_dir + "/" + name + "_deps" deps_dir = searcher.get_cache_dir() + "/" + name + "_deps"
if not os.path.isdir(deps_dir): if not os.path.isdir(deps_dir):
os.mkdir(deps_dir) os.mkdir(deps_dir)
for dep_full in deps_full: for dep_full in deps_full:
@ -96,7 +221,7 @@ class ImageProcessor:
dpi_arg = ['-render', str(dpi/72)] if dpi is not None else [] dpi_arg = ['-render', str(dpi/72)] if dpi is not None else []
if subprocess.run(['asy', name, '-o', target_name, '-f', format, *dpi_arg], cwd=deps_dir).returncode != 0: if subprocess.run(['asy', name, '-o', target_name, '-f', format, *dpi_arg], cwd=deps_dir).returncode != 0:
raise AsyError(f"Could not convert '{full_path}' to '{format}'") raise AsyError(f"Could not convert '{full_path}' to '{format}'")
shutil.move(deps_dir + "/" + target_name, self.cache_dir + "/" + target_name) shutil.move(deps_dir + "/" + target_name, searcher.get_cache_dir() + "/" + target_name)
# Convert SVGs using inkscape # Convert SVGs using inkscape
elif ext == "svg": elif ext == "svg":
@ -114,6 +239,8 @@ class ImageProcessor:
if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0: if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0:
raise ImageMagickError(f"Could not convert '{full_path}' to '{format}'") raise ImageMagickError(f"Could not convert '{full_path}' to '{format}'")
if context is not None:
context.add_deps(deps_full)
return target_name return target_name
def is_outdated(self, target: str, deps: list[str]): def is_outdated(self, target: str, deps: list[str]):
@ -124,37 +251,7 @@ class ImageProcessor:
return True return True
return False return False
def publish_image(self, target_name, relative: bool=True) -> str: def get_image_size(self, full_path: str) -> tuple[int, int]:
cache_path = self.cache_dir + "/" + target_name
if not os.path.isfile(cache_path):
raise FileNotFoundError(f'Image {target_name} not cached')
target_path = self.public_dir + "/" + target_name
try:
if os.path.exists(target_path):
if os.path.getmtime(cache_path) > os.path.getmtime(target_path):
os.remove(target_path)
os.link(cache_path, target_path)
else:
os.link(cache_path, target_path)
except OSError as e:
if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy
shutil.copyfile(cache_path, target_path)
else:
raise e
return target_name if relative else target_path
def get_image_size(self, input_filename: str, additional_dirs: list[str]=[]) -> tuple[int, int]:
full_path = self.find_image(input_filename, additional_dirs)
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
# Getting image size using ImageMagick is slow. VERY # Getting image size using ImageMagick is slow. VERY
return Image.open(full_path).size return Image.open(full_path).size
def find_image(self, input_filename: str, additional_dirs: list[str]=[]) -> Union[str, None]:
for dir in [*self.lookup_dirs, *additional_dirs]:
if os.path.isfile(dir + "/" + input_filename):
return dir + "/" + input_filename

1
src/formatitko/katex-server

@ -0,0 +1 @@
Subproject commit 953b44e942282375ac369af233c123b28146713e

1
src/formatitko/katex-server/.gitignore

@ -1 +0,0 @@
node_modules

1
src/formatitko/katex-server/README.md

@ -1 +0,0 @@
This was made by Standa Lukeš @exyi

1
src/formatitko/katex-server/index.js

@ -1 +0,0 @@
console.log(require('katex').renderToString('\\frac{2a}{b}'))

131
src/formatitko/katex-server/index.mjs

@ -1,131 +0,0 @@
// KaTeX rendering server
// Listens on unix socket, path is provided as first argument
// Expects JSON lines, each line is a query with the following schema:
// {
// formulas: [
// {
// tex: string,
// options?: object
// }
// ],
// options?: object
// }
// see https://katex.org/docs/options.html for list of available options
// If options formulas[].options field is used, the global options field is ignored.
// For each line, returns one JSON line with the following schema:
// {
// results: [
// { html?: string } | { error?: string }
// ]
// } | { error?: string }
// If one formula is invalid, the error in results is used
// If the entire query is invalid (couldn't parse JSON, for example), the outer error field is used
import katex from 'katex'
import net from 'net'
import * as readline from 'readline'
const myArgs = process.argv.slice(2)
const unixSocketPath = myArgs[0]
if (!unixSocketPath) {
console.error('you must specify socket path')
process.exit(1)
}
// This server listens on a Unix socket at /var/run/mysocket
var unixServer = net.createServer(handleClient);
unixServer.listen(unixSocketPath);
console.log("OK")
function handleExit(signal) {
// unixServer.emit('close')
unixServer.close(function () {
});
process.exit(0); // put this into the callback to avoid closing open connections
}
process.on('SIGINT', handleExit);
process.on('SIGQUIT', handleExit);
process.on('SIGTERM', handleExit);
process.on('exit', handleExit);
const defaultOptions = {}
/**
* @param {net.Socket} socket
* @returns {Promise<void>}
* */
function socketWrite(socket, data) {
return new Promise((resolve, reject) => {
socket.write(data, (err) => {
if (err) {
reject(err)
} else {
resolve()
}
})
})
}
/**
* @param {net.Socket} client
* */
async function handleClient(client) {
const rl = readline.createInterface({ input: client })
/* Added by GS: A stack of katex's `macros` objects, each group inherits
* the one from the parent group and can add its own stuff without
* affecting the parent.
*/
let macroStack = [{}]
for await (const line of rl) {
try {
// The custom commands for pushing and popping the macro stack.
if (line === "begingroup") {
// Copy the current state of macros and push it onto the stack.
macroStack.push({...macroStack.slice(-1)[0]})
continue
} else if (line === "endgroup") {
macroStack.pop()
continue
} else if (line === "init") {
macroStack = [{}]
continue
}
const query = JSON.parse(line)
const results = []
for (const input of query.formulas) {
const options = input.options ?? query.options ?? defaultOptions
// Add macros from the macros option
if (options.macros) {
for (const macro of Object.keys(options.macros)) {
macroStack.slice(-1)[macro] = options.macros[macro]
}
}
options.macros = macroStack.slice(-1)[0]
// Enforce globalGroup option, katex then saves created macros
// into the options.macros object.
options.globalGroup = true
try {
const html = katex.renderToString(input.tex, options)
results.push({ html })
} catch (e) {
results.push({ error: String(e) })
}
}
await socketWrite(client, JSON.stringify({ results }, null, query.debug ? ' ' : undefined))
await socketWrite(client, '\n')
} catch (e) {
console.error(e)
await socketWrite(client, JSON.stringify({ error: String(e) }))
await socketWrite(client, '\n')
}
}
}

39
src/formatitko/katex-server/package-lock.json

@ -1,39 +0,0 @@
{
"name": "ksp-katex-server",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "ksp-katex-server",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"katex": "^0.16.3"
}
},
"node_modules/commander": {
"version": "8.3.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz",
"integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==",
"engines": {
"node": ">= 12"
}
},
"node_modules/katex": {
"version": "0.16.3",
"resolved": "https://registry.npmjs.org/katex/-/katex-0.16.3.tgz",
"integrity": "sha512-3EykQddareoRmbtNiNEDgl3IGjryyrp2eg/25fHDEnlHymIDi33bptkMv6K4EOC2LZCybLW/ZkEo6Le+EM9pmA==",
"funding": [
"https://opencollective.com/katex",
"https://github.com/sponsors/katex"
],
"dependencies": {
"commander": "^8.0.0"
},
"bin": {
"katex": "cli.js"
}
}
}
}

14
src/formatitko/katex-server/package.json

@ -1,14 +0,0 @@
{
"name": "ksp-katex-server",
"version": "1.0.0",
"description": "",
"main": "index.mjs",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"katex": "^0.16.3"
}
}

30
src/formatitko/katex.py

@ -3,6 +3,7 @@ import subprocess
import tempfile import tempfile
import json import json
import os import os
import shutil
class KatexError(Exception): class KatexError(Exception):
pass pass
@ -20,8 +21,10 @@ class KatexClient:
_socket_file: str _socket_file: str
_temp_dir: tempfile.TemporaryDirectory[str] _temp_dir: tempfile.TemporaryDirectory[str]
_connected: bool _connected: bool
_katex_server_path: str
def __init__(self, socket: str=None, connect: bool=True): def __init__(self, socket: str=None, connect: bool=True, katex_server_path: str=None):
self._katex_server_path = katex_server_path
if socket is not None: if socket is not None:
self._socket_file = socket self._socket_file = socket
else: else:
@ -38,20 +41,21 @@ class KatexClient:
self._temp_dir = tempfile.TemporaryDirectory(prefix='formatitko') self._temp_dir = tempfile.TemporaryDirectory(prefix='formatitko')
self._socket_file = self._temp_dir.name + "/katex-socket" self._socket_file = self._temp_dir.name + "/katex-socket"
srcdir = os.path.dirname(os.path.realpath(__file__)) if self._katex_server_path is None:
srcdir = os.path.dirname(os.path.realpath(__file__))
# Test if `node_modules` directory exists and if not, run `npm install` # Test if `node_modules` directory exists and if not, run `npm install`
if not os.path.isdir(srcdir + "/katex-server/node_modules"): if not os.path.isdir(srcdir + "/katex-server/node_modules"):
print("Installing node dependencies for the first time...") print("Installing node dependencies for the first time...")
try: npm = shutil.which("npm") or shutil.which("yarnpkg")
subprocess.run(["npm", "install"], cwd=srcdir+"/katex-server", check=True) if npm is None:
except subprocess.CalledProcessError as e:
if e.returncode == 127:
raise NPMNotFoundError("npm not found. Node.js is required to use KaTeX.") raise NPMNotFoundError("npm not found. Node.js is required to use KaTeX.")
else: subprocess.run([npm, "install"], cwd=srcdir+"/katex-server", check=True)
raise e
self._katex_server_path = srcdir + "/katex-server/index.mjs"
self._server_process = subprocess.Popen(["node", srcdir + "/katex-server/index.mjs", self._socket_file], stdout=subprocess.PIPE) self._server_process = subprocess.Popen(["node", self._katex_server_path, self._socket_file], stdout=subprocess.PIPE)
ok = self._server_process.stdout.readline() ok = self._server_process.stdout.readline()
if ok != b"OK\n": if ok != b"OK\n":
@ -79,7 +83,7 @@ class KatexClient:
if "error" in response: if "error" in response:
raise KatexServerError(response["error"]) raise KatexServerError(response["error"])
if "error" in response["results"][0]: if "error" in response["results"][0]:
raise KatexError(response["results"][0]["error"]) raise KatexError(response["results"][0]["error"] + " in $" + tex + "$")
else: else:
return response["results"][0]["html"] return response["results"][0]["html"]

53
src/formatitko/nop_processor.py

@ -7,15 +7,20 @@ from typing import Union, Callable
from .whitespace import NBSP from .whitespace import NBSP
from .elements import FQuoted from .elements import FQuoted
from .context import Group, InlineGroup, BlockGroup from .context import Group, InlineGroup, BlockGroup, Context
from .whitespace import Whitespace from .whitespace import Whitespace
from .command import BlockCommand, InlineCommand, CodeCommand, Command from .command import BlockCommand, InlineCommand, CodeCommand, Command
from .output_generator import FormatitkoRecursiveError
ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]] ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]]
class DoubleDocError(Exception):
"TransformProcessor should only ever see a single Doc."
pass
class NOPProcessor: class NOPProcessor:
TYPE_DICT: dict[type, Callable] TYPE_DICT: dict[type, Callable]
context: Union[Context, None] = None
class UnknownElementError(Exception): class UnknownElementError(Exception):
f"An unknown Element has been passed to the NOPProcessor, probably because panflute introduced a new one." f"An unknown Element has been passed to the NOPProcessor, probably because panflute introduced a new one."
@ -96,32 +101,45 @@ class NOPProcessor:
return [] return []
def transform(self, e: ELCl) -> ELCl: def transform(self, e: ELCl) -> ELCl:
if isinstance(e, list): try:
return self.transform_list(e) if isinstance(e, list):
elif isinstance(e, ListContainer): return self.transform_list(e)
return self.transform_ListContainer(e) elif isinstance(e, ListContainer):
return self.transform_ListContainer(e)
for transformer in self.get_pretransformers(): for transformer in self.get_pretransformers():
e = transformer(e) e = transformer(e)
try: try:
e = self.TYPE_DICT[type(e)](e) method = self.TYPE_DICT[type(e)]
except KeyError: except KeyError:
raise self.UnknownElementError(type(e)) raise self.UnknownElementError(type(e))
for transformer in self.get_posttransformers(): e = method(e)
e = transformer(e)
return e for transformer in self.get_posttransformers():
e = transformer(e)
return e
except FormatitkoRecursiveError as err:
if not isinstance(e, ListContainer):
err.add_element(e)
raise err
except Exception as err:
raise FormatitkoRecursiveError(e, self.context) from err
def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]:
for i in range(len(e)): i = 0
while i < len(e): # The length of the list can change mid-transformation, so we need to check the length each time
e[i] = self.transform(e[i]) e[i] = self.transform(e[i])
i-=-1
return e return e
def transform_ListContainer(self, e: ListContainer) -> ListContainer: def transform_ListContainer(self, e: ListContainer) -> ListContainer:
for i in range(len(e)): i = 0
while i < len(e): # The length of the list can change mid-transformation, so we need to check the length each time
e[i] = self.transform(e[i]) e[i] = self.transform(e[i])
i-=-1
return e return e
@ -293,6 +311,9 @@ class NOPProcessor:
return e return e
def transform_Doc(self, e: Doc) -> Doc: def transform_Doc(self, e: Doc) -> Doc:
if self.context is not None:
raise DoubleDocError()
self.context = Context(e, self.root_file_path)
e.content = self.transform(e.content) e.content = self.transform(e.content)
return e return e

118
src/formatitko/output_generator.py

@ -3,18 +3,56 @@ from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quote
from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead
from panflute import TableRow, TableCell, Caption, Doc from panflute import TableRow, TableCell, Caption, Doc
from panflute import MetaValue from panflute import MetaValue
from panflute import stringify
from typing import Union, Callable from typing import Union, Callable
from .whitespace import NBSP from .whitespace import NBSP
from .elements import FQuoted from .elements import FQuoted
from .context import Group, InlineGroup, BlockGroup, Context from .context import Group, InlineGroup, BlockGroup, Context
import re
import sys
class UnknownElementError(Exception): class UnknownElementError(Exception):
"An unknown Element has been passed to the OutputGenerator, probably because panflute introduced a new one." "An unknown Element has been passed to the OutputGenerator, probably because panflute introduced a new one."
pass pass
class FormatitkoRecursiveError(Exception):
"A generic exception which wraps other exceptions and adds element-based traceback"
elements: list[Union[Element, ListContainer, list[Union[Element, ListContainer]]]]
context: Context
def __init__(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]], context: Context, *args):
self.elements = [e]
self.context = context
super().__init__(args)
def add_element(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]):
self.elements.append(e)
def pretty_print(self, tracebacklimit: int=0):
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def print_filename_recursive(context: Context):
return context.path +\
((" (included from " + print_filename_recursive(context.parent) + ")") if context.parent else "")
eprint(f"Error occured in file {print_filename_recursive(self.context)} in ", end="")
line = None
for i in range(len(self.elements)-1, 0, -1):
if hasattr(self.elements[i], "content") and len(self.elements[i].content) > 0 and isinstance(self.elements[i].content[0], Inline) and line is None:
line = self.elements[i]
eprint(type(self.elements[i]).__name__ + "[" + (str(self.elements[i-1].index) if isinstance(self.elements[i-1].index, int) else "") + "]", end=": ")
if line:
eprint()
eprint('on line: "' + stringify(line).strip() + '"', end="")
eprint()
eprint("in element: " + str(self.elements[0]).replace("\n", "\\n"))
sys.tracebacklimit = tracebacklimit
raise self.__cause__ from None
class OutputGenerator: class OutputGenerator:
_empty_lines: int _empty_lines: int
context: Union[Context, None] context: Union[Context, None]
@ -101,28 +139,36 @@ class OutputGenerator:
} }
def generate(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): def generate(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]):
if isinstance(e, Group): try:
old_context = self.context if isinstance(e, Group):
self.context = e.context old_context = self.context
if isinstance(e, list): self.context = e.context
self.generate_list(e) if isinstance(e, list):
elif isinstance(e, ListContainer): self.generate_list(e)
self.generate_ListContainer(e) elif isinstance(e, ListContainer):
elif isinstance(e, Inline): self.generate_ListContainer(e)
self.generate_Inline(e) elif isinstance(e, Inline):
elif isinstance(e, Block): self.generate_Inline(e)
self.generate_Block(e) elif isinstance(e, Block):
elif isinstance(e, MetaValue): self.generate_Block(e)
self.generate_MetaValue(e) elif isinstance(e, MetaValue):
elif isinstance(e, MetaList): self.generate_MetaValue(e)
self.generate_MetaList(e) elif isinstance(e, MetaList):
else: self.generate_MetaList(e)
try: else:
self.TYPE_DICT_MISC[type(e)](e) try:
except KeyError: method = self.TYPE_DICT_MISC[type(e)]
raise UnknownElementError(type(e)) except KeyError as err:
if isinstance(e, Group): raise UnknownElementError(type(e)) from err
self.context = old_context method(e)
if isinstance(e, Group):
self.context = old_context
except FormatitkoRecursiveError as err:
if not isinstance(e, ListContainer):
err.add_element(e)
raise err
except Exception as err:
raise FormatitkoRecursiveError(e, self.context) from err
def escape_special_chars(self, text: str) -> str: def escape_special_chars(self, text: str) -> str:
return text return text
@ -247,9 +293,10 @@ class OutputGenerator:
def generate_MetaValue(self, e: MetaValue): def generate_MetaValue(self, e: MetaValue):
try: try:
self.TYPE_DICT_META[type(e)](e) method = self.TYPE_DICT_META[type(e)]
except KeyError: except KeyError:
self.generate(e.content) self.generate(e.content)
method(e)
def generate_MetaBlocks(self, e: MetaBlocks): def generate_MetaBlocks(self, e: MetaBlocks):
self.generate(e.content) self.generate(e.content)
@ -258,16 +305,23 @@ class OutputGenerator:
self.generate(e.content) self.generate(e.content)
def generate_MetaBool(self, e: MetaBool): def generate_MetaBool(self, e: MetaBool):
self.generate_simple_tag(e) if e.boolean:
self.write("True")
else:
self.write("False")
def generate_MetaMap(self, e: MetaMap): def generate_MetaMap(self, e: MetaMap):
self.generate_simple_tag(e) self.generate_simple_tag(e)
def generate_MetaString(self, e: MetaString): def generate_MetaString(self, e: MetaString):
self.generate_simple_tag(e) self.write(e.text)
def generate_Inline(self, e: Inline): def generate_Inline(self, e: Inline):
self.TYPE_DICT_INLINE[type(e)](e) try:
method = self.TYPE_DICT_INLINE[type(e)]
except KeyError as err:
raise UnknownElementError(type(e)) from err
method(e)
def generate_Str(self, e: Str): def generate_Str(self, e: Str):
self.write(self.escape_special_chars(e.text)) self.write(self.escape_special_chars(e.text))
@ -368,7 +422,11 @@ class OutputGenerator:
def generate_Block(self, e: Block): def generate_Block(self, e: Block):
self.TYPE_DICT_BLOCK[type(e)](e) try:
method = self.TYPE_DICT_BLOCK[type(e)]
except KeyError as err:
raise UnknownElementError(type(e)) from err
method(e)
# Block elements # Block elements
@ -433,12 +491,14 @@ class OutputGenerator:
self.generate_simple_tag(e) self.generate_simple_tag(e)
def generate_Doc(self, e: Doc): def generate_Doc(self, e: Doc):
if "header-includes" in e.metadata: # This is the pandoc way of doing things
self.generate(e.metadata["header-includes"])
if "header_content" in e.metadata: if "header_content" in e.metadata:
self.generate(e.metadata["header_content"]) self.generate(e.metadata["header_content"])
self.generate_simple_tag(e) self.generate_simple_tag(e)
if "footer_content" in e.metadata: if "footer_content" in e.metadata:
self.generate(e.metadata["footer_content"]) self.generate(e.metadata["footer_content"])
def generate_BlockGroup(self, e: BlockGroup): def generate_BlockGroup(self, e: BlockGroup):
self.generate_simple_tag(e) self.generate_simple_tag(e)

270
src/formatitko/tex.py

@ -1,270 +0,0 @@
from panflute import *
import os
from typing import Union
from .whitespace import NBSP
from .elements import FQuoted
from .util import inlinify
from .context import Group
from .images import ImageProcessor
# Heavily inspired by: git://git.ucw.cz/labsconf2022.git
def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
# `only` attribute which makes transformed elements appear only in tex
# output or html output
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex":
return ""
if isinstance(e, ListContainer):
return ''.join([tex(child, i, indent_level, indent_str) for child in e])
# Bits from which the final element output is built at the end of this
# function. Most elements override this by returning their own output.
content_foot = ""
content_head = ""
arguments = ""
open = "{"
close = "}"
tag = e.tag.lower()
tags = {
Header: "h"+chr(64 + e.level) if isinstance(e, Header) else "",
}
if type(e) in tags:
tag = tags[type(e)]
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
not_implemented = {
Citation: True,
Cite: True,
Definition: True,
DefinitionItem: True,
DefinitionList: True
}
if type(e) in not_implemented:
return f'% FIXME: {type(e)}s not implemented \n'
# Elements which can be represented by a simple string
simple_string = {
NBSP: "~",
Space: " ",
Null: "",
LineBreak: f"\\\\",
SoftBreak: f" ",
HorizontalRule: "\\hr\n\n"
}
if type(e) in simple_string:
return simple_string[type(e)]
# Simplest basic elements
if isinstance(e, Str):
return e.text.replace(" ", "~")
if isinstance(e, Para):
return tex(e.content, i, 0, "")+"\n\n"
if isinstance(e, Span) or isinstance(e, Plain):
return tex(e.content, i, 0, "")
# Overriding elements with their own returns
if isinstance(e, Image):
url = e.url
# TODO: This should use OutputGenerator's get_image_processor_args
# Attributes → image processor args
additional_args = {}
if "file-width" in e.attributes:
additional_args["width"] = int(e.attributes["file-width"])
if "file-height" in e.attributes:
additional_args["height"] = int(e.attributes["file-height"])
if "file-quality" in e.attributes:
additional_args["quality"] = int(e.attributes["file-quality"])
if "file-dpi" in e.attributes:
additional_args["dpi"] = int(e.attributes["file-dpi"])
# The directory of the current file, will also look for images there.
source_dir = e.attributes["source_dir"]
_, ext = os.path.splitext(url)
ext = ext[1:]
# Conversions between various formats.
if ext in ["pdf", "png", "jpeg"]:
# Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the cache
# directory.
url = i.process_image(url, ext, source_dir, **additional_args)
elif ext in ["svg"]:
url = i.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["epdf"]:
url = i.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", source_dir, **additional_args)
else:
url = i.process_image(url, "pdf", source_dir, **additional_args)
url = i.find_image(url, [i.cache_dir])
width = ""
if "width" in e.attributes:
width = e.attributes["width"]
# 50% → 0.5\hsize
if e.attributes["width"][-1] == "%":
width = str(int(e.attributes["width"][:-1])/100) + "\\hsize"
width = "width " + width
return f'\\image{{{width}}}{{{url}}}'
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
return f'{tex(e.content, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{tex(e.content, i, 0, "")}'
elif e.style == "en":
if e.quote_type == "SingleQuote":
return f'{tex(e.content, i, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{tex(e.content, i, 0, "")}'
else:
if e.quote_type == "SingleQuote":
return f'\'{tex(e.content, i, 0, "")}\''
elif e.quote_type == "DoubleQuote":
return f'"{tex(e.content, i, 0, "")}"'
else:
return f'"{tex(e.content, i, 0, "")}"'
if isinstance(e, Code):
return f'\\verb`{e.text.replace("`", "backtick")}`'
if isinstance(e, Figure):
return f'\\figure{{{tex(e.content, i, indent_level+1, indent_str)}}}{{{tex(e.caption, i, indent_level+1, indent_str)}}}\n\n'
# Figure caption
if isinstance(e, Caption):
if inlinify(e) is not None:
return f'\\figcaption{{{tex(e.content, i, 0, "")}}}'
if isinstance(e, Math):
if e.format == "DisplayMath":
return f'$${e.text}$$\n'
else:
return f'${e.text}$'
# Footnote
if isinstance(e, Note):
tag = "fn"
if inlinify(e) is not None:
return f'\\fn{{{tex(inlinify(e), i, 0, "")}}}'
if isinstance(e, Table):
aligns = {
"AlignLeft": "\\quad#\\quad\\hfil",
"AlignRight": "\\quad\\hfil#\\quad",
"AlignCenter": "\\quad\\hfil#\\hfil\\quad",
"AlignDefault": "\\quad#\\quad\\hfil"
}
text = "\strut"+"&".join([aligns[col[0]] for col in e.colspec])+"\cr\n"
text += tex(e.head.content, i, 0, "")
text += "\\noalign{\\hrule}\n"
text += tex(e.content[0].content, i, 0, "")
text += "\\noalign{\\hrule}\n"
text += tex(e.foot.content, i, 0, "")
return "\\vskip1em\n\\halign{"+text+"}\n\\vskip1em\n"
# FIXME: Implement rowspan
if isinstance(e, TableRow):
return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, i, 0, "") for cell in e.content])+"\cr\n"
if isinstance(e, RawInline):
if e.format == "tex":
return e.text
else:
return ""
if isinstance(e, RawBlock):
if e.format == "tex":
return f'{e.text}\n'
else:
return ""
# See https://pandoc.org/MANUAL.html#line-blocks
if isinstance(e, LineBlock):
return f'{tex(e.content, i, indent_level+1, indent_str)}\n'
if isinstance(e, LineItem):
return tex(e.content, i, 0, "") + ("\\\\\n" if e.next else "\n")
if type(e) is Div:
return f'{tex(e.content, i, indent_level+1, indent_str)}'
if isinstance(e, Doc):
return tex(e.content, i, indent_level, indent_str)+"\n\\bye" # Is having the \bye a bad idea here?
# Non-overriding elements, they get generated using the template at the end
# of this function
if isinstance(e, BulletList):
tag = "list"
open = ""
arguments = "{o}"
close = "\\endlist"
elif isinstance(e, OrderedList):
tag = "list"
open = ""
styles = {
"DefaultStyle": "n",
"Decimal": "n",
"LowerRoman": "i",
"UpperRoman:": "I",
"LowerAlpha": "a",
"UpperAlpha": "A"
}
style = styles[e.style]
delimiters = {
"DefaultDelim": f"{style}.",
"Period": f"{style}.",
"OneParen": f"{style})",
"TwoParens": f"({style})"
}
style = delimiters[e.delimiter]
arguments = f"{{{style}}}"
close = "\\endlist"
# FIXME: Starting number of list
elif isinstance(e, ListItem):
tag = ":"
elif isinstance(e, Link):
if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url:
tag = "url"
else:
tag = "linkurl"
arguments = f'{{{e.url}}}'
elif isinstance(e, Group):
tag = "begingroup"
open = ""
if "lang" in e.metadata and e.metadata["lang"] is not None:
open = "\\language"+e.metadata["lang"]
close = "\\endgroup"
# The default which all non-overriding elements get generated by. This
# includes elements, which were not explicitly mentioned in this function,
# e. g. Strong, Emph...
if isinstance(e, Inline):
return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}'
out_str = ""
out_str = f"\\{tag}{arguments}{open}\n"
out_str += content_head
if hasattr(e, "_content"):
out_str += tex(e.content, i, indent_level+1, indent_str)
if hasattr(e, "text"):
out_str += e.text
out_str += f"{content_foot}\n{close}\n\n"
return out_str

22
src/formatitko/tex_generator.py

@ -8,7 +8,7 @@ from typing import Union
import os import os
from .output_generator import OutputGenerator from .output_generator import OutputGenerator
from .images import ImageProcessor from .images import ImageProcessor, ImageProcessorNamespaceSearcher
from .whitespace import NBSP from .whitespace import NBSP
from .elements import FQuoted from .elements import FQuoted
@ -106,9 +106,15 @@ class UCWTexGenerator(OutputGenerator):
url = e.url url = e.url
additional_args = self.get_image_processor_args(e.attributes) additional_args = self.get_image_processor_args(e.attributes)
additional_args["context"] = self.context
# The directory of the current file, will also look for images there. # The directory of the current file relative to the current working directory
source_dir = self.context.dir source_dir = self.context.dir
# The directory of the current file relative to the md file we were called on
rel_dir = self.context.rel_dir
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
url = self.imageProcessor.get_path_without_namespace(url)
_, ext = os.path.splitext(url) _, ext = os.path.splitext(url)
ext = ext[1:] ext = ext[1:]
@ -118,17 +124,17 @@ class UCWTexGenerator(OutputGenerator):
# Even supported elements have to be 'converted' because the # Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the cache # processing contains finding and moving them to the cache
# directory. # directory.
url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) url = self.imageProcessor.process_image(url, ext, searcher, **additional_args)
elif ext in ["svg"]: # FIXME elif ext in ["svg"]: # FIXME
url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args)
elif ext in ["epdf"]: elif ext in ["epdf"]:
url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args)
elif ext in ["jpg"]: elif ext in ["jpg"]:
url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args)
else: else:
url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args)
url = self.imageProcessor.find_image(url, [self.imageProcessor.cache_dir]) url = searcher.get_cache_searcher().find_image(url)
width = "" width = ""
if "width" in e.attributes: if "width" in e.attributes:
width = e.attributes["width"] width = e.attributes["width"]

176
src/formatitko/transform.py

@ -1,176 +0,0 @@
from panflute import Element, Div, Span, Quoted, Image, CodeBlock, Str, MetaInlines, MetaString, MetaBool, RawBlock
import re
import os
# Import local files
from .whitespace import Whitespace, NBSP, bavlna
from .util import nullify, import_md
from .context import Context, BlockGroup
from .command import Command, BlockCommand, InlineCommand
from .command_util import handle_command_define, parse_command
from .elements import FQuoted
import warnings
warnings.warn("The transform function has been deprecated, is left only for reference and will be removed in future commits. TransformProcessor should be used in its place.", DeprecationWarning)
# This is where tha magic happens. This function transforms a single element,
# to transform the entire tree, panflute's walk should be used.
def transform(e: Element, c: Context) -> Element:
warnings.warn("The transform function has been deprecated, is left only for reference and will be removed in future commits. TransformProcessor should be used in its place.", DeprecationWarning)
# Determine if this space should be non-breakable. See whitespace.py.
if isinstance(e, Whitespace) and bavlna(e, c):
e = NBSP()
if hasattr(e, "attributes"):
# `if` attribute. Only show this element if flag is set.
if "if" in e.attributes:
if not c.is_flag_set(e.attributes["if"]):
return nullify(e)
# `ifn` attribute. Only show this element if flag is NOT set
if "ifn" in e.attributes:
if c.is_flag_set(e.attributes["ifn"]):
return nullify(e)
# There are multiple ways to call a command so we turn it into a
# unified element first and then call it at the end. This handles the
# []{c=commandname} and
# :::{c=commandname}
# :::
# syntax.
if (isinstance(e, Div) or isinstance(e, Span)) and "c" in e.attributes:
if isinstance(e, Div):
e = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
else:
e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
# Isolated subdocuments using Group and a different Context. Can be
# separate files (using attribute `partial`) or be inline using the
# following syntax:
# ```markdown {.group}
# * file content *
# ```
# Both can contain their own metadata in a FrontMatter (YAML header)
if (isinstance(e, Div) and "partial" in e.attributes)\
or (isinstance(e, CodeBlock) and "markdown" in e.classes and "group" in e.classes):
if isinstance(e, Div):
if not c.trusted: # If we're in an untrusted context, we shouldn't allow inclusion of files outside the PWD.
full_path = os.path.abspath(c.dir + "/" + e.attributes["partial"])
pwd = os.path.abspath(".")
if os.path.commonpath([full_path, pwd]) != os.path.commonpath([pwd]):
return nullify(e)
text = open(c.dir + "/" + e.attributes["partial"], "r").read()
path = c.dir + "/" + e.attributes["partial"]
else:
text = e.text
path = c.path
if "type" in e.attributes and e.attributes["type"] in ["tex", "html"]:
e = RawBlock(text, e.attributes["type"])
else:
includedDoc = import_md(text)
trusted = True
if "untrusted" in e.attributes and (e.attributes["untrusted"] == True or e.attributes["untrusted"] == 'True'):
trusted = False
if not c.trusted:
trusted = False
nContext = Context(includedDoc, path, c, trusted=trusted)
language = includedDoc.get_metadata("lang")
includedDoc = includedDoc.walk(transform, nContext)
e = BlockGroup(*includedDoc.content, context=nContext, metadata={"lang": language})
# Transform panflute's Quoted to custom FQuoted, see above.
if isinstance(e, Quoted):
quote_styles = {
"cs": "cs",
"en": "en",
"sk": "cs",
None: None
}
e = FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[c.get_metadata("lang")])
if isinstance(e, Image):
# Pass down the directory of the current source file for finding image
# files.
e.attributes["source_dir"] = c.dir
# Pass down "no-srcset" metadatum as attribute down to images.
if not "no-srcset" in e.attributes:
e.attributes["no-srcset"] = c.get_metadata("no-srcset") if c.get_metadata("no-srcset") is not None else False
# Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks
if isinstance(e, CodeBlock):
if not "highlight" in e.attributes:
e.attributes["highlight"] = c.get_metadata("highlight") if c.get_metadata("highlight") is not None else True
if not "style" in e.attributes:
e.attributes["style"] = c.get_metadata("highlight-style") if c.get_metadata("highlight-style") is not None else "default"
e.attributes["noclasses"] = False
# I think this is supposed to enable inline styles for highlighting when the style differs from the document, but it clearly doesn't work. a) HTML_generator never accesses it and b) Only the top-level document contains a style so you have to ask the top level context, not the current context.
else:
e.attributes["noclasses"] = True
# Execute python code inside source code block. Works the same as commands.
# Syntax:
# ```python {.run}
# print("woo")
# ```
if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and "run" in e.classes:
if not c.trusted:
return nullify(e)
command_output = parse_command(e.text)(BlockCommand(), c)
e = BlockCommand().replaceSelf(*([] if command_output is None else command_output))
e = e.walk(transform, c)
# Command defines for calling using BlockCommand and InlineCommand. If
# redefine is used instead of define, the program doesn't check if the
# command already exists.
# Syntax:
# ```python {define=commandname}
# print(wooo)
# ```
if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and hasattr(e, "attributes")\
and ("define" in e.attributes or "redefine" in e.attributes):
if not c.trusted:
return nullify(e)
e = handle_command_define(e, c)
## Shorthands
# Shorter (and sometimes the only) forms of certain features
if isinstance(e, Span) and len(e.content) == 1 and isinstance(e.content[0], Str):
## Handle special command shorthand [!commandname]{}
if re.match(r"^![\w.]+$", e.content[0].text):
e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]})
## Handle import [#path/file.md]{}
# This is the exact opposite of partials. We take the commands, flags
# and metadata but drop the content.
elif re.match(r"^#.+$", e.content[0].text):
importedDoc = import_md(open(c.dir + "/" + e.content[0].text[1:], "r").read())
importedDoc.walk(transform, c)
return nullify(e)
## Handle metadata print [$key1.key2]{}
# This is a shorthand for just printing the content of some metadata.
elif re.match(r"^\$[\w.]+$", e.content[0].text):
val = c.get_metadata(e.content[0].text[1:], False)
if isinstance(val, MetaInlines):
e = Span(*val.content)
e = e.walk(transform, c)
elif isinstance(val, MetaString):
e = Span(Str(val.string))
elif isinstance(val, MetaBool):
e = Span(Str(str(val.boolean)))
else:
raise TypeError(f"Cannot print value of metadatum '{e.content[0].text[1:]}' of type '{type(val)}'")
## Execute commands
# panflute's walk function transforms the children first, then the root
# element, so the content the command receives is already transformed.
# The output from the command is then transformed manually again.
if isinstance(e, Command):
if not c.get_command(e.attributes["c"]):
raise NameError(f"Command not defined '{e.attributes['c']}'.")
command_output = c.get_command(e.attributes["c"])(e, c)
e = e.replaceSelf(*command_output)
e = e.walk(transform, c)
return e

117
src/formatitko/transform_processor.py

@ -3,6 +3,7 @@ from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quote
from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead
from panflute import TableRow, TableCell, Caption, Doc from panflute import TableRow, TableCell, Caption, Doc
from panflute import MetaValue from panflute import MetaValue
from panflute.containers import attach
from typing import Union, Callable from typing import Union, Callable
from types import ModuleType from types import ModuleType
@ -18,17 +19,12 @@ from .context import Group, InlineGroup, BlockGroup
from .util import nullify, import_md from .util import nullify, import_md
from .context import Context, CommandCallable from .context import Context, CommandCallable
from .whitespace import Whitespace, bavlna from .whitespace import Whitespace, bavlna
from .command import BlockCommand, InlineCommand, CodeCommand, Command from .command import BlockCommand, InlineCommand, CodeCommand, Command, InlineError
from .command_util import handle_command_define, parse_command from .command_util import handle_command_define, parse_command
from .nop_processor import NOPProcessor, ELCl from .nop_processor import NOPProcessor, ELCl, DoubleDocError
class DoubleDocError(Exception):
"TransformProcessor should only ever see a single Doc."
pass
class TransformProcessor(NOPProcessor): class TransformProcessor(NOPProcessor):
context: Union[Context, None] = None
root_file_path: str root_file_path: str
root_highlight_style: str = "default" root_highlight_style: str = "default"
_command_modules: list[tuple[Union[dict[str, CommandCallable], ModuleType], str]] = [] _command_modules: list[tuple[Union[dict[str, CommandCallable], ModuleType], str]] = []
@ -44,6 +40,15 @@ class TransformProcessor(NOPProcessor):
def add_command_module(self, module: Union[dict[str, CommandCallable], ModuleType], module_name: str=""): def add_command_module(self, module: Union[dict[str, CommandCallable], ModuleType], module_name: str=""):
self._command_modules.append((module, module_name)) self._command_modules.append((module, module_name))
def init_context(self, e: Doc) -> Context:
if self.context is not None:
raise DoubleDocError()
self.context = Context(e, self.root_file_path)
for module, module_name in self._command_modules:
self.context.add_commands_from_module(module, module_name)
e.content = [BlockGroup(*e.content, context=self.context)]
return self.context
def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]: def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]:
return super().get_pretransformers()+[self.handle_if_attribute, self.handle_ifnot_attribute] return super().get_pretransformers()+[self.handle_if_attribute, self.handle_ifnot_attribute]
@ -61,15 +66,22 @@ class TransformProcessor(NOPProcessor):
return nullify(e) return nullify(e)
return e return e
def transform_ListContainer(self, e: ListContainer) -> ListContainer:
try:
return super().transform_ListContainer(e)
except TypeError as err:
names = []
for el in e:
if hasattr(el, "attributes") and "c" in el.attributes:
names.append(el.attributes["c"])
if len(names) > 0:
raise InlineError(f"The command{'s' if len(names) > 1 else ''} {names[0] if len(names) == 1 else names} was called in an Inline way but returned Block content. Put it in a paragraph alone or execute it as a Div using: \n::: {{c={names[0] if len(names) == 1 else '<command_name>'}}}\n:::")
else:
raise err
def transform_Doc(self, e: Doc) -> Doc: def transform_Doc(self, e: Doc) -> Doc:
if self.context is not None: self.init_context(e)
raise DoubleDocError()
self.context = Context(e, self.root_file_path)
for module, module_name in self._command_modules:
self.context.add_commands_from_module(module, module_name)
e.content = self.transform(e.content) e.content = self.transform(e.content)
e.content = [BlockGroup(*e.content, context=self.context)]
return e return e
@ -87,19 +99,36 @@ class TransformProcessor(NOPProcessor):
e.content = self.transform(e.content) e.content = self.transform(e.content)
# OG now has Context so this is not needed per se, but I'm keeping this here for the handling of attribute > context > default value # OG now has Context so this is not needed per se, but I'm keeping this here for the handling of attribute > context > default value
# Pass down "no-srcset" metadatum as attribute down to images. # Pass down "no-srcset" metadatum as attribute down to images.
if not "no-srcset" in e.attributes: if "no-srcset" not in e.attributes:
e.attributes["no-srcset"] = self.context.get_metadata("no-srcset") if self.context.get_metadata("no-srcset") is not None else False e.attributes["no-srcset"] = self.context.get_metadata("no-srcset") if self.context.get_metadata("no-srcset") is not None else False
if "no-img-link" not in e.attributes:
e.attributes["no-img-link"] = self.context.get_metadata("no-img-link") if self.context.get_metadata("no-img-link") is not None else False
return e return e
def create_Group(self, *content, new_context: Context, inline: bool=False) -> Group: def create_Group(self, *content, new_context: Context, replaced:Element, inline: bool=False) -> Group:
old_context = self.context old_context = self.context
self.context = new_context self.context = new_context
content = self.transform([*content])
self.context = old_context
if inline: if inline:
return InlineGroup(*content, context=new_context) g = InlineGroup(*content, context=new_context)
else: else:
return BlockGroup(*content, context=new_context) g = BlockGroup(*content, context=new_context)
attach(g, replaced.parent, replaced.location, replaced.index)
g = self.transform(g)
self.context = old_context
return g
def transform_Para(self, e: Para) -> Union[Para, Div]:
if len(e.content) == 1 and isinstance(e.content[0], Span):
# If the span turns out to be a command, it might return a Div. We should then replace ourselves with the Div
span = e.content[0]
span = self.transform(span)
if isinstance(span, Div):
return span
else:
e.content[0] = span
return super().transform_Para(e)
else:
return super().transform_Para(e)
def transform_Div(self, e: Div) -> Union[Div, Group, Null, RawBlock]: def transform_Div(self, e: Div) -> Union[Div, Group, Null, RawBlock]:
e.content = self.transform(e.content) e.content = self.transform(e.content)
@ -110,14 +139,15 @@ class TransformProcessor(NOPProcessor):
new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted)
for attribute, value in e.attributes.items(): for attribute, value in e.attributes.items():
new_context.set_metadata(attribute, value) new_context.set_metadata(attribute, value)
return self.create_Group(*e.content, new_context=new_context) return self.create_Group(*e.content, replaced=e, new_context=new_context)
if "c" in e.attributes: if "c" in e.attributes:
# Commands can be called multiple ways, this handles the following syntax: # Commands can be called multiple ways, this handles the following syntax:
# :::{c=commandname} # :::{c=commandname}
# ::: # :::
e = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) command = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
return self.transform(e) attach(command, e.parent, e.location, e.index)
return self.transform(command)
if "partial" in e.attributes: if "partial" in e.attributes:
# `partial` attribute # `partial` attribute
@ -129,7 +159,9 @@ class TransformProcessor(NOPProcessor):
pwd = os.path.abspath(".") pwd = os.path.abspath(".")
if os.path.commonpath([full_path, pwd]) != os.path.commonpath([pwd]): if os.path.commonpath([full_path, pwd]) != os.path.commonpath([pwd]):
return nullify(e) return nullify(e)
text = open(self.context.dir + "/" + e.attributes["partial"], "r").read() filename = self.context.dir + "/" + e.attributes["partial"]
self.context.add_dep(filename)
text = open(filename, "r").read()
path = self.context.dir + "/" + e.attributes["partial"] path = self.context.dir + "/" + e.attributes["partial"]
if e.attributes["type"] == "md": if e.attributes["type"] == "md":
includedDoc = import_md(text) includedDoc = import_md(text)
@ -138,7 +170,7 @@ class TransformProcessor(NOPProcessor):
trusted = False trusted = False
if not self.context.trusted: if not self.context.trusted:
trusted = False trusted = False
return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, path, self.context, trusted=trusted)) return self.create_Group(*includedDoc.content, replaced=e, new_context=Context(includedDoc, path, self.context, trusted=trusted))
elif e.attributes["type"] in ["tex", "html"]: elif e.attributes["type"] in ["tex", "html"]:
return RawBlock(text, e.attributes["type"]) return RawBlock(text, e.attributes["type"])
@ -170,19 +202,21 @@ class TransformProcessor(NOPProcessor):
new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted)
for attribute, value in e.attributes.items(): for attribute, value in e.attributes.items():
new_context.set_metadata(attribute, value) new_context.set_metadata(attribute, value)
return self.create_Group(*e.content, new_context=new_context, inline=True) return self.create_Group(*e.content, replaced=e, new_context=new_context, inline=True)
if "c" in e.attributes: if "c" in e.attributes:
# Commands can be called multiple ways, this handles the following syntax: # Commands can be called multiple ways, this handles the following syntax:
# []{c=commandname} and # []{c=commandname} and
e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) command = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
return self.transform(e) attach(command, e.parent, e.location, e.index)
return self.transform(command)
if len(e.content) == 1 and isinstance(e.content[0], Str): if len(e.content) == 1 and isinstance(e.content[0], Str):
## Handle special command shorthand [!commandname]{} ## Handle special command shorthand [!commandname]{}
if re.match(r"^![\w.]+$", e.content[0].text): if re.match(r"^![\w.]+$", e.content[0].text):
e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]}) command = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]})
return self.transform(e) attach(command, e.parent, e.location, e.index)
return self.transform(command)
## Handle import [#ksp_formatitko as ksp]{}, [#ksp_formatitko]{type=module} or [#path/file.md]{type=md} ## Handle import [#ksp_formatitko as ksp]{}, [#ksp_formatitko]{type=module} or [#path/file.md]{type=md}
# Import a python module as commands (type=module, the default) or # Import a python module as commands (type=module, the default) or
@ -191,7 +225,9 @@ class TransformProcessor(NOPProcessor):
if not "type" in e.attributes: if not "type" in e.attributes:
e.attributes["type"] = "module" e.attributes["type"] = "module"
if e.attributes["type"] == "md": if e.attributes["type"] == "md":
importedDoc = import_md(open(self.context.dir + "/" + e.content[0].text[1:], "r").read()) filename = self.context.dir + "/" + e.content[0].text[1:]
self.context.add_dep(filename)
importedDoc = import_md(open(filename, "r").read())
self.transform(importedDoc.content) self.transform(importedDoc.content)
elif e.attributes["type"] == "module": elif e.attributes["type"] == "module":
matches = re.match(r"^(\w+)(?: as (\w+))?$", e.content[0].text[1:]) matches = re.match(r"^(\w+)(?: as (\w+))?$", e.content[0].text[1:])
@ -201,7 +237,9 @@ class TransformProcessor(NOPProcessor):
module_name = matches.group(1) if matches.group(2) is None else matches.group(2) module_name = matches.group(1) if matches.group(2) is None else matches.group(2)
self.context.add_commands_from_module(module, module_name) self.context.add_commands_from_module(module, module_name)
elif e.attributes["type"] == "metadata": elif e.attributes["type"] == "metadata":
data = json.load(open(self.context.dir + "/" + e.content[0].text[1:], "r")) filename = self.context.dir + "/" + e.content[0].text[1:]
self.context.add_dep(filename)
data = json.load(open(filename, "r"))
key = "" if not "key" in e.attributes else e.attributes["key"] key = "" if not "key" in e.attributes else e.attributes["key"]
self.context.import_metadata(data, key) self.context.import_metadata(data, key)
else: else:
@ -229,14 +267,15 @@ class TransformProcessor(NOPProcessor):
def transform_CodeBlock(self, e: CodeBlock) -> Union[CodeBlock, Div, Null]: def transform_CodeBlock(self, e: CodeBlock) -> Union[CodeBlock, Div, Null]:
if "markdown" in e.classes and "group" in e.classes: if "markdown" in e.classes and "group" in e.classes:
includedDoc = import_md(e.text) includedDoc = import_md(e.text)
return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, self.context.path, self.context, self.context.trusted)) return self.create_Group(*includedDoc.content, replaced=e, new_context=Context(includedDoc, self.context.path, self.context, self.context.trusted))
if "python" in e.classes and "run" in e.classes: if "python" in e.classes and "run" in e.classes:
if not self.context.trusted: if not self.context.trusted:
return nullify(e) return nullify(e)
command_output = parse_command(e.text)(BlockCommand(), self.context) command_output = parse_command(e.text)(BlockCommand(), self.context, self)
e = BlockCommand().replaceSelf(*([] if command_output is None else command_output)) command = BlockCommand().replaceSelf(*([] if command_output is None else command_output))
return self.transform(e) attach(command, e.parent, e.location, e.index)
return self.transform(command)
if "python" in e.classes and ("define" in e.attributes or "redefine" in e.attributes): if "python" in e.classes and ("define" in e.attributes or "redefine" in e.attributes):
if not self.context.trusted: if not self.context.trusted:
@ -244,7 +283,9 @@ class TransformProcessor(NOPProcessor):
return handle_command_define(e, self.context) return handle_command_define(e, self.context)
if "c" in e.attributes: if "c" in e.attributes:
return self.transform(CodeCommand(e.text, identifier=e.identifier, classes=e.classes, attributes=e.attributes)) command = CodeCommand(e.text, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
attach(command, e.parent, e.location, e.index)
return self.transform(command)
# Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks # Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks
# OG now has Context so this is not needed per se, but I'm keeping this here for the handling of attribute > context > default value # OG now has Context so this is not needed per se, but I'm keeping this here for the handling of attribute > context > default value
@ -257,9 +298,9 @@ class TransformProcessor(NOPProcessor):
def transform_Command(self, e: Command) -> Union[Div, Span]: def transform_Command(self, e: Command) -> Union[Div, Span]:
if not self.context.get_command(e.attributes["c"]): if not self.context.get_command(e.attributes["c"]):
raise NameError(f"Command not defined '{e.attributes['c']}'.") raise NameError(f"Command not defined '{e.attributes['c']}'.")
command_output = self.context.get_command(e.attributes["c"])(e, self.context) command_output = self.context.get_command(e.attributes["c"])(e, self.context, self)
e = e.replaceSelf(*([] if command_output is None else command_output)) e = e.replaceSelf(*([] if command_output is None else command_output))
return self.transform(e) return e
def transform_Whitespace(self, e: Whitespace) -> Whitespace: def transform_Whitespace(self, e: Whitespace) -> Whitespace:
if bavlna(e, self.context): if bavlna(e, self.context):

2
src/formatitko/util.py

@ -37,7 +37,7 @@ def parse_string(s: str) -> list[Union[Str, Space]]:
# we ever want to disable or enable some of panflute's markdown extensions, # we ever want to disable or enable some of panflute's markdown extensions,
# this is the place to do it. # this is the place to do it.
def import_md(s: str, standalone: bool=True) -> Union[Doc, list[Element]]: def import_md(s: str, standalone: bool=True) -> Union[Doc, list[Element]]:
return convert_text(s, standalone=standalone, input_format="markdown-definition_lists-latex_macros") return convert_text(s, standalone=standalone, input_format="markdown-definition_lists-latex_macros", extra_args=["--strip-comments"])
def import_md_list(s: str) -> list[Element]: def import_md_list(s: str) -> list[Element]:
return import_md(s, standalone=False) return import_md(s, standalone=False)

10
test/test-files/test-partial.md

@ -56,7 +56,15 @@ $$
$$ $$
```comment <!--There is an inline *emphasis with $math \error$*.-->
<!--
```python {.run}
print("bruh")
raise Exception("Jsem piča")
```
-->
![This is a figure, go figure...](logo.svg){width=25%}What ![This is a figure, go figure...](logo.svg){width=25%}What
![This is a figure, go figure...](logo.pdf){width=50%} ![This is a figure, go figure...](logo.pdf){width=50%}
![Fakt epesní reproduktor](reproduktor.jpeg){width=10em} ![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}

8
test/test-top.html

@ -1,8 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta charset='utf-8'>
<link rel='stylesheet' href='https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css' integrity='sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0' crossorigin='anonymous'>
</head>
<body>

26
test/test.md

@ -3,12 +3,13 @@ title: 'Wooooo a title'
subtitle: 'A subtitle' subtitle: 'A subtitle'
are_we_there_yet: False are_we_there_yet: False
lang: "en" lang: "en"
header-includes: |
<style>
body {
color: forestgreen;
}
</style>
--- ---
:::: {.header_content}
::: {partial="test-top.html" type="html"}
:::
::::
[#test-files/test-import.md]{type=md} [#test-files/test-import.md]{type=md}
[#test.json]{type=metadata key=orgs} [#test.json]{type=metadata key=orgs}
@ -198,12 +199,23 @@ ii. wym bro
```python {define=bash} ```python {define=bash}
import subprocess import subprocess
c = subprocess.run(["bash", "-c", element.text], stdout=subprocess.PIPE, check=True, encoding="utf-8") c = subprocess.run(["bash", "-c", element.text], stdout=subprocess.PIPE, check=True, encoding="utf-8")
return [pf.Para(pf.Str(c.stdout))] return [pf.CodeBlock(c.stdout)]
``` ```
```bash {c=bash} ```bash {c=bash}
cat /etc/hostname cat /etc/os-release
```
::: {.group lang=cs}
```python {.run}
return processor.transform([
*parse_string("V "),
pf.Link(pf.Str("odevzdávátku"), url="https://ksp.mff.cuni.cz/z/odevzdavatko/"),
*parse_string(" si necháte vygenerovat vstupy a odevzdáte příslušné výstupy. Záleží jen na vás, jak výstupy vyrobíte.")
])
``` ```
:::
```html ```html
<div> <div>

Loading…
Cancel
Save