From a7963ba824e684c4c1dbbca098f070a7a295637d Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 6 Jan 2024 19:32:54 +0100 Subject: [PATCH 01/22] WIP: Experimental error handling with snippets of input for OutputGenerator. Would be nice to generalise for TransformProcessor, which is not easy as they don't have a common parent class. --- src/formatitko/html_generator.py | 16 ++-- src/formatitko/katex.py | 2 +- src/formatitko/latex_generator.py | 10 +-- src/formatitko/output_generator.py | 118 +++++++++++++++++++---------- src/formatitko/tex_generator.py | 56 +++++++------- 5 files changed, 120 insertions(+), 82 deletions(-) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index d7dbc03..59db46d 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -29,10 +29,10 @@ class HTMLGenerator(OutputGenerator): self.imageProcessor = imageProcessor super().__init__(output_file, *args, **kwargs) - def generate(self, e: Union[Element, ListContainer]): + def _generate(self, e: Union[Element, ListContainer]): if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": return - super().generate(e) + super()._generate(e) def escape_special_chars(self, text: str) -> str: text = text.replace("&", "&") @@ -190,7 +190,7 @@ class HTMLGenerator(OutputGenerator): attributes["alt"] = e.title else: fake_out = io.StringIO() - HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content) + HTMLGenerator(fake_out, self.katexClient, self.imageProcessor)._generate(e.content) attributes["alt"] = fake_out.getvalue() if len(srcset) != 0: @@ -202,7 +202,7 @@ class HTMLGenerator(OutputGenerator): img = RawInline(self.single_tag("img", attributes)) link = Link(img, url=url) - self.generate(link) + self._generate(link) def generate_InlineGroup(self, e: InlineGroup): self.generate_Group(e) @@ -216,10 +216,10 @@ class HTMLGenerator(OutputGenerator): self.katexClient.endgroup() def generate_Plain(self, e: Plain): - self.generate(e.content) + self._generate(e.content) def generate_LineItem(self, e: LineItem): - self.generate(e.content) + self._generate(e.content) self.write("
") self.endln() @@ -229,12 +229,12 @@ class HTMLGenerator(OutputGenerator): tag = self.tagname(e) if inline is not None: self.write(self.start_tag(tag)+" (") - self.generate(inline) + self._generate(inline) self.write(") "+self.end_tag(tag)) else: self.writeln(self.start_tag(tag) + "(") self.indent_more() - self.generate(e.content) + self._generate(e.content) self.indent_less() self.writeln(self.end_tag(tag) + ")") diff --git a/src/formatitko/katex.py b/src/formatitko/katex.py index ad431e6..39e521f 100644 --- a/src/formatitko/katex.py +++ b/src/formatitko/katex.py @@ -79,7 +79,7 @@ class KatexClient: if "error" in response: raise KatexServerError(response["error"]) if "error" in response["results"][0]: - raise KatexError(response["results"][0]["error"]) + raise KatexError(response["results"][0]["error"] + " in $" + tex + "$") else: return response["results"][0]["html"] diff --git a/src/formatitko/latex_generator.py b/src/formatitko/latex_generator.py index 124cf11..31e0325 100644 --- a/src/formatitko/latex_generator.py +++ b/src/formatitko/latex_generator.py @@ -17,10 +17,10 @@ class LaTeXGenerator(OutputGenerator): self.imageProcessor = imageProcessor super().__init__(output_file, *args, **kwargs) - def generate(self, e: Union[Element, ListContainer]): + def _generate(self, e: Union[Element, ListContainer]): if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": return - super().generate(e) + super()._generate(e) def escape_special_chars(self, text: str) -> str: text = text.replace("&", "\\&") @@ -58,14 +58,14 @@ class LaTeXGenerator(OutputGenerator): self.endln() def generate_Para(self, e: Para): - self.generate(e.content) + self._generate(e.content) self.writeln("") # This ensures an empty line def generate_Plain(self, e: Plain): - self.generate(e.content) + self._generate(e.content) def generate_Span(self, e: Plain): - self.generate(e.content) + self._generate(e.content) def generate_Header(self, e: Header): tag = { diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 61df1c6..69db2cf 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -3,18 +3,31 @@ from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quote from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead from panflute import TableRow, TableCell, Caption, Doc from panflute import MetaValue +from panflute import stringify from typing import Union, Callable from .whitespace import NBSP from .elements import FQuoted from .context import Group, InlineGroup, BlockGroup, Context -import re +import re, sys class UnknownElementError(Exception): "An unknown Element has been passed to the OutputGenerator, probably because panflute introduced a new one." pass + +class OutputGeneratorError(Exception): + "A generic exception which wraps other exceptions and adds element-based traceback" + elements: list[Union[Element, ListContainer, list[Union[Element, ListContainer]]]] + + def __init__(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]], *args): + self.elements = [e] + super().__init__(args) + + def add_element(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): + self.elements.append(e) + class OutputGenerator: _empty_lines: int context: Union[Context, None] @@ -101,28 +114,53 @@ class OutputGenerator: } def generate(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): - if isinstance(e, Group): - old_context = self.context - self.context = e.context - if isinstance(e, list): - self.generate_list(e) - elif isinstance(e, ListContainer): - self.generate_ListContainer(e) - elif isinstance(e, Inline): - self.generate_Inline(e) - elif isinstance(e, Block): - self.generate_Block(e) - elif isinstance(e, MetaValue): - self.generate_MetaValue(e) - elif isinstance(e, MetaList): - self.generate_MetaList(e) - else: - try: - self.TYPE_DICT_MISC[type(e)](e) - except KeyError: - raise UnknownElementError(type(e)) - if isinstance(e, Group): - self.context = old_context + try: + self._generate(e) + except OutputGeneratorError as err: + def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + eprint("Error occured in ", end="") + for i in range(len(err.elements)-1, 0, -1): + if hasattr(err.elements[i], "content") and isinstance(err.elements[i].content[0], Inline): + eprint() + eprint('on line: "' + stringify(err.elements[i]) + '"', end="") + break + eprint(type(err.elements[i]).__name__ + "[" + str(err.elements[i-1].index) + "]", end=": ") + eprint() + eprint("in element: " + str(err.elements[0])) + sys.tracebacklimit = 0 + raise err.__cause__ from None + + def _generate(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): + try: + if isinstance(e, Group): + old_context = self.context + self.context = e.context + if isinstance(e, list): + self.generate_list(e) + elif isinstance(e, ListContainer): + self.generate_ListContainer(e) + elif isinstance(e, Inline): + self.generate_Inline(e) + elif isinstance(e, Block): + self.generate_Block(e) + elif isinstance(e, MetaValue): + self.generate_MetaValue(e) + elif isinstance(e, MetaList): + self.generate_MetaList(e) + else: + try: + self.TYPE_DICT_MISC[type(e)](e) + except KeyError as err: + raise UnknownElementError(type(e)) from err + if isinstance(e, Group): + self.context = old_context + except OutputGeneratorError as err: + if not isinstance(e, ListContainer): + err.add_element(e) + raise err + except Exception as err: + raise OutputGeneratorError(e) from err def escape_special_chars(self, text: str) -> str: return text @@ -210,13 +248,13 @@ class OutputGenerator: def generate_simple_inline_tag(self, tag: str, content: Union[ListContainer, Element, list[Union[Element, ListContainer]]], attributes: dict[str,str]={}): self.write(self.start_tag(tag, attributes)) - self.generate(content) + self._generate(content) self.write(self.end_tag(tag)) def generate_simple_block_tag(self, tag: str, content: Union[ListContainer, Element, list[Union[Element, ListContainer]]], attributes: dict[str,str]={}): self.writeln(self.start_tag(tag, attributes)) self.indent_more() - self.generate(content) + self._generate(content) self.indent_less() self.writeln(self.end_tag(tag)) @@ -235,27 +273,27 @@ class OutputGenerator: def generate_ListContainer(self, e: ListContainer): for child in e: - self.generate(child) + self._generate(child) def generate_list(self, e: list): for el in e: - self.generate(el) + self._generate(el) def generate_MetaList(self, e: MetaList): for child in e: - self.generate(child) + self._generate(child) def generate_MetaValue(self, e: MetaValue): try: self.TYPE_DICT_META[type(e)](e) except KeyError: - self.generate(e.content) + self._generate(e.content) def generate_MetaBlocks(self, e: MetaBlocks): - self.generate(e.content) + self._generate(e.content) def generate_MetaInlines(self, e: MetaInlines): - self.generate(e.content) + self._generate(e.content) def generate_MetaBool(self, e: MetaBool): self.generate_simple_tag(e) @@ -285,33 +323,33 @@ class OutputGenerator: if e.style == "cs": if e.quote_type == "SingleQuote": self.write("‚") - self.generate(e.content) + self._generate(e.content) self.write("‘") elif e.quote_type == "DoubleQuote": self.write("„") - self.generate(e.content) + self._generate(e.content) self.write("“") elif e.style == "en": if e.quote_type == "SingleQuote": self.write("‘") - self.generate(e.content) + self._generate(e.content) self.write("’") elif e.quote_type == "DoubleQuote": self.write("“") - self.generate(e.content) + self._generate(e.content) self.write("”") else: if e.quote_type == "SingleQuote": self.write("'") - self.generate(e.content) + self._generate(e.content) self.write("'") elif e.quote_type == "DoubleQuote": self.write("\"") - self.generate(e.content) + self._generate(e.content) self.write("\"") else: self.write("\"") - self.generate(e.content) + self._generate(e.content) self.write("\"") @@ -434,10 +472,10 @@ class OutputGenerator: def generate_Doc(self, e: Doc): if "header_content" in e.metadata: - self.generate(e.metadata["header_content"]) + self._generate(e.metadata["header_content"]) self.generate_simple_tag(e) if "footer_content" in e.metadata: - self.generate(e.metadata["footer_content"]) + self._generate(e.metadata["footer_content"]) def generate_BlockGroup(self, e: BlockGroup): self.generate_simple_tag(e) diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py index c97b90d..a1f2268 100644 --- a/src/formatitko/tex_generator.py +++ b/src/formatitko/tex_generator.py @@ -41,10 +41,10 @@ class UCWTexGenerator(OutputGenerator): text = text.replace("​", "") return text - def generate(self, e: Union[Element, ListContainer]): + def _generate(self, e: Union[Element, ListContainer]): if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": return - super().generate(e) + super()._generate(e) def writepar(self, text: str): self.ensure_empty(2) @@ -60,7 +60,7 @@ class UCWTexGenerator(OutputGenerator): def generate_Para(self, e: Para): self.ensure_empty(2) - self.generate(e.content) + self._generate(e.content) self.ensure_empty(2) def generate_HorizontalRule(self, e: HorizontalRule): @@ -72,7 +72,7 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"\ucwmodule{verb}") self.writeln(r"\ucwmodule{link}") self.writeln(r"\input formatitko.tex") - self.generate(e.content) + self._generate(e.content) self.writeln(r"\bye") def get_language_macro(self, lang: str): @@ -88,21 +88,21 @@ class UCWTexGenerator(OutputGenerator): def generate_InlineGroup(self, e: InlineGroup): self.write(r"{") self.write(self.get_language_macro(self.context.get_metadata("lang"))) - self.generate(e.content) + self._generate(e.content) self.write(r"}") def generate_BlockGroup(self, e: BlockGroup): self.writeln(r"\begingroup") self.indent_more() self.writeln(self.get_language_macro(self.context.get_metadata("lang"))) - self.generate(e.content) + self._generate(e.content) self.indent_less() self.writeln(r"\endgroup") def generate_Header(self, e: Header): self.ensure_empty(2) self.write("\\"+"sub"*(e.level-1)+"section{") - self.generate(e.content) + self._generate(e.content) self.write(r"}") self.ensure_empty(2) @@ -156,12 +156,12 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"\vskip5pt") self.writeln(r"\centerline{") self.indent_more() - self.generate(e.content) + self._generate(e.content) self.indent_less() self.writeln(r"}") self.writeln(r"\centerline{") self.indent_more() - self.generate(e.caption) + self._generate(e.caption) self.indent_less() self.writeln(r"}") self.writeln(r"\vskip5pt{}") @@ -173,7 +173,7 @@ class UCWTexGenerator(OutputGenerator): else: self.write(r"{\I{}") self._italic+=1 - self.generate(e.content) + self._generate(e.content) self._italic-=1 self.write(r"}") @@ -183,7 +183,7 @@ class UCWTexGenerator(OutputGenerator): else: self.write(r"{\bf{}") self._bold+=1 - self.generate(e.content) + self._generate(e.content) self._bold-=1 self.write(r"}") @@ -204,7 +204,7 @@ class UCWTexGenerator(OutputGenerator): def generate_Note(self, e: Note): self.write(r"\fn{") - self.generate(inlinify(e)) + self._generate(inlinify(e)) self.write(r"}") def generate_Table(self, e: Table): @@ -217,11 +217,11 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"\vskip1em") self.writeln(r"\halign{\strut"+"&".join([aligns[col[0]] for col in e.colspec])+r"\cr") self.indent_more() - self.generate(e.head.content) + self._generate(e.head.content) self.writeln(r"\noalign{\hrule}") - self.generate(e.content[0].content) + self._generate(e.content[0].content) self.writeln(r"\noalign{\hrule}") - self.generate(e.foot.content) + self._generate(e.foot.content) self.indent_less() self.writeln("}") self.writeln(r"\vskip1em") @@ -230,7 +230,7 @@ class UCWTexGenerator(OutputGenerator): for cell in e.content: if cell.colspan > 1: self.write(r"\multispan"+str(cell.colspan)+"{} ") - self.generate(cell.content) + self._generate(cell.content) if cell.next: self.write(" & ") self.write(r"\cr") @@ -245,10 +245,10 @@ class UCWTexGenerator(OutputGenerator): self.writeraw(e.text) def generate_Plain(self, e: Plain): - self.generate(e.content) + self._generate(e.content) def generate_Span(self, e: Span): - self.generate(e.content) + self._generate(e.content) def generate_CodeBlock(self, e: CodeBlock): self.writeln(r"\verbatim{") @@ -256,15 +256,15 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"}") def generate_Div(self, e: Div): - self.generate(e.content) + self._generate(e.content) def generate_LineBlock(self, e: LineBlock): self.writeln() - self.generate(e.content) + self._generate(e.content) self.writeln() def generate_LineItem(self, e: LineItem): - self.generate(e.content) + self._generate(e.content) if e.next: self.write(r"\\") self.endln() @@ -273,7 +273,7 @@ class UCWTexGenerator(OutputGenerator): self.ensure_empty(2) self.writeln(r"\list{o}") self.indent_more() - self.generate(e.content) + self._generate(e.content) self.indent_less() self.write(r"\endlist") self.ensure_empty(2) @@ -298,7 +298,7 @@ class UCWTexGenerator(OutputGenerator): style = delimiters[e.delimiter] self.writeln(r"\list{"+style+r"}") self.indent_more() - self.generate(e.content) + self._generate(e.content) self.indent_less() self.writeln(r"\endlist") self.ensure_empty(2) @@ -306,13 +306,13 @@ class UCWTexGenerator(OutputGenerator): def generate_ListItem(self, e: ListItem): self.endln() self.write(r"\:") - self.generate(e.content) + self._generate(e.content) self.endln() def generate_BlockQuote(self, e: BlockQuote): self.writeln(r"\blockquote{") self.indent_more() - self.generate(e.content) + self._generate(e.content) self.indent_less() self.writeln(r"}") @@ -321,17 +321,17 @@ class UCWTexGenerator(OutputGenerator): self.write(r"\url{") else: self.write(r"\linkurl{"+e.url+r"}{") - self.generate(e.content) + self._generate(e.content) self.write(r"}") # } def generate_Subscript(self, e: Subscript): self.write(r"\subscript{") - self.generate(e.content) + self._generate(e.content) self.write(r"}") def generate_Superscript(self, e: Superscript): self.write(r"\superscript{") - self.generate(e.content) + self._generate(e.content) self.write(r"}") def generate_simple_tag(self, e: Union[Element, None] = None, tag: str = "", attributes: Union[dict[str, str], None] = None, content: Union[ListContainer, Element, list[Union[Element, ListContainer]], str, None] = None, inline: Union[bool, None] = None): From 42a63b3163a1bb406aab3c6d7a85446844cb3fea Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Thu, 15 Feb 2024 17:43:10 +0100 Subject: [PATCH 02/22] Partial rewrite of error handling Now the error doesn't handle itself, but offers a helper function to do it. --- src/formatitko/formatitko.py | 33 ++++++++++--- src/formatitko/html_generator.py | 16 +++---- src/formatitko/latex_generator.py | 10 ++-- src/formatitko/nop_processor.py | 40 +++++++++------- src/formatitko/output_generator.py | 77 +++++++++++++++--------------- src/formatitko/tex_generator.py | 56 +++++++++++----------- 6 files changed, 129 insertions(+), 103 deletions(-) diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 9b3b942..9ee76ee 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -14,7 +14,7 @@ from .katex import KatexClient from .html import html from .tex import tex from .images import ImageProcessor -from .output_generator import OutputGenerator +from .output_generator import OutputGenerator, FormatitkoRecursiveError from .html_generator import HTMLGenerator from .transform_processor import TransformProcessor from .pandoc_processor import PandocProcessor @@ -54,9 +54,15 @@ def main(): doc = import_md(open(args.input_filename, "r").read()) if args.debug: - OutputGenerator(sys.stdout).generate(doc) + try: + OutputGenerator(sys.stdout).generate(doc) + except FormatitkoRecursiveError as e: + e.pretty_print() - doc = TransformProcessor(args.input_filename).transform(doc) + try: + doc = TransformProcessor(args.input_filename).transform(doc) + except FormatitkoRecursiveError as e: + e.pretty_print() # Initialize the image processor (this just keeps some basic state) imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs) @@ -65,11 +71,18 @@ def main(): # Initialize KaTeX client (this runs the node app and connects to a unix socket) with KatexClient(socket=args.katex_socket) as katexClient: with open(args.output_html, "w") as file: - HTMLGenerator(file, katexClient, imageProcessor).generate(doc) + try: + HTMLGenerator(file, katexClient, imageProcessor).generate(doc) + except FormatitkoRecursiveError as e: + e.pretty_print() if args.output_tex is not None: with open(args.output_tex, "w") as file: - UCWTexGenerator(file, imageProcessor).generate(doc) + try: + UCWTexGenerator(file, imageProcessor).generate(doc) + except FormatitkoRecursiveError as e: + e.pretty_print() + if args.output_md is not None: with open(args.output_md, "w") as file: @@ -83,7 +96,10 @@ def main(): if args.output_tex is None: fd = tempfile.NamedTemporaryFile(dir=".", suffix=".tex") with open(fd.name, "w") as file: - UCWTexGenerator(file, imageProcessor).generate(doc) + try: + UCWTexGenerator(file, imageProcessor).generate(doc) + except FormatitkoRecursiveError as e: + e.pretty_print() filename = fd.name else: filename = args.output_tex @@ -93,7 +109,10 @@ def main(): if args.debug: print("-----------------------------------") - OutputGenerator(sys.stdout).generate(doc) + try: + OutputGenerator(sys.stdout).generate(doc) + except FormatitkoRecursiveError as e: + e.pretty_print() if __name__ == "__main__": diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 59db46d..d7dbc03 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -29,10 +29,10 @@ class HTMLGenerator(OutputGenerator): self.imageProcessor = imageProcessor super().__init__(output_file, *args, **kwargs) - def _generate(self, e: Union[Element, ListContainer]): + def generate(self, e: Union[Element, ListContainer]): if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": return - super()._generate(e) + super().generate(e) def escape_special_chars(self, text: str) -> str: text = text.replace("&", "&") @@ -190,7 +190,7 @@ class HTMLGenerator(OutputGenerator): attributes["alt"] = e.title else: fake_out = io.StringIO() - HTMLGenerator(fake_out, self.katexClient, self.imageProcessor)._generate(e.content) + HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content) attributes["alt"] = fake_out.getvalue() if len(srcset) != 0: @@ -202,7 +202,7 @@ class HTMLGenerator(OutputGenerator): img = RawInline(self.single_tag("img", attributes)) link = Link(img, url=url) - self._generate(link) + self.generate(link) def generate_InlineGroup(self, e: InlineGroup): self.generate_Group(e) @@ -216,10 +216,10 @@ class HTMLGenerator(OutputGenerator): self.katexClient.endgroup() def generate_Plain(self, e: Plain): - self._generate(e.content) + self.generate(e.content) def generate_LineItem(self, e: LineItem): - self._generate(e.content) + self.generate(e.content) self.write("
") self.endln() @@ -229,12 +229,12 @@ class HTMLGenerator(OutputGenerator): tag = self.tagname(e) if inline is not None: self.write(self.start_tag(tag)+" (") - self._generate(inline) + self.generate(inline) self.write(") "+self.end_tag(tag)) else: self.writeln(self.start_tag(tag) + "(") self.indent_more() - self._generate(e.content) + self.generate(e.content) self.indent_less() self.writeln(self.end_tag(tag) + ")") diff --git a/src/formatitko/latex_generator.py b/src/formatitko/latex_generator.py index 31e0325..124cf11 100644 --- a/src/formatitko/latex_generator.py +++ b/src/formatitko/latex_generator.py @@ -17,10 +17,10 @@ class LaTeXGenerator(OutputGenerator): self.imageProcessor = imageProcessor super().__init__(output_file, *args, **kwargs) - def _generate(self, e: Union[Element, ListContainer]): + def generate(self, e: Union[Element, ListContainer]): if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": return - super()._generate(e) + super().generate(e) def escape_special_chars(self, text: str) -> str: text = text.replace("&", "\\&") @@ -58,14 +58,14 @@ class LaTeXGenerator(OutputGenerator): self.endln() def generate_Para(self, e: Para): - self._generate(e.content) + self.generate(e.content) self.writeln("") # This ensures an empty line def generate_Plain(self, e: Plain): - self._generate(e.content) + self.generate(e.content) def generate_Span(self, e: Plain): - self._generate(e.content) + self.generate(e.content) def generate_Header(self, e: Header): tag = { diff --git a/src/formatitko/nop_processor.py b/src/formatitko/nop_processor.py index 2f09ffb..5a89228 100644 --- a/src/formatitko/nop_processor.py +++ b/src/formatitko/nop_processor.py @@ -10,6 +10,7 @@ from .elements import FQuoted from .context import Group, InlineGroup, BlockGroup from .whitespace import Whitespace from .command import BlockCommand, InlineCommand, CodeCommand, Command +from .output_generator import FormatitkoRecursiveError ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]] @@ -96,23 +97,30 @@ class NOPProcessor: return [] def transform(self, e: ELCl) -> ELCl: - if isinstance(e, list): - return self.transform_list(e) - elif isinstance(e, ListContainer): - return self.transform_ListContainer(e) - - for transformer in self.get_pretransformers(): - e = transformer(e) - try: - e = self.TYPE_DICT[type(e)](e) - except KeyError: - raise self.UnknownElementError(type(e)) - - for transformer in self.get_posttransformers(): - e = transformer(e) - - return e + if isinstance(e, list): + return self.transform_list(e) + elif isinstance(e, ListContainer): + return self.transform_ListContainer(e) + + for transformer in self.get_pretransformers(): + e = transformer(e) + + try: + e = self.TYPE_DICT[type(e)](e) + except KeyError: + raise self.UnknownElementError(type(e)) + + for transformer in self.get_posttransformers(): + e = transformer(e) + + return e + except FormatitkoRecursiveError as err: + if not isinstance(e, ListContainer): + err.add_element(e) + raise err + except Exception as err: + raise FormatitkoRecursiveError(e) from err def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: for i in range(len(e)): diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 69db2cf..9869b87 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -10,14 +10,15 @@ from .whitespace import NBSP from .elements import FQuoted from .context import Group, InlineGroup, BlockGroup, Context -import re, sys + +import sys class UnknownElementError(Exception): "An unknown Element has been passed to the OutputGenerator, probably because panflute introduced a new one." pass -class OutputGeneratorError(Exception): +class FormatitkoRecursiveError(Exception): "A generic exception which wraps other exceptions and adds element-based traceback" elements: list[Union[Element, ListContainer, list[Union[Element, ListContainer]]]] @@ -28,6 +29,22 @@ class OutputGeneratorError(Exception): def add_element(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): self.elements.append(e) + def pretty_print(self): + def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + eprint("Error occured in ", end="") + for i in range(len(self.elements)-1, 0, -1): + if hasattr(self.elements[i], "content") and isinstance(self.elements[i].content[0], Inline): + eprint() + eprint('on line: "' + stringify(self.elements[i]) + '"', end="") + break + eprint(type(self.elements[i]).__name__ + "[" + str(self.elements[i-1].index) + "]", end=": ") + eprint() + eprint("in element: " + str(self.elements[0])) + sys.tracebacklimit = 0 + raise self.__cause__ from None + + class OutputGenerator: _empty_lines: int context: Union[Context, None] @@ -114,24 +131,6 @@ class OutputGenerator: } def generate(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): - try: - self._generate(e) - except OutputGeneratorError as err: - def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - eprint("Error occured in ", end="") - for i in range(len(err.elements)-1, 0, -1): - if hasattr(err.elements[i], "content") and isinstance(err.elements[i].content[0], Inline): - eprint() - eprint('on line: "' + stringify(err.elements[i]) + '"', end="") - break - eprint(type(err.elements[i]).__name__ + "[" + str(err.elements[i-1].index) + "]", end=": ") - eprint() - eprint("in element: " + str(err.elements[0])) - sys.tracebacklimit = 0 - raise err.__cause__ from None - - def _generate(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): try: if isinstance(e, Group): old_context = self.context @@ -155,12 +154,12 @@ class OutputGenerator: raise UnknownElementError(type(e)) from err if isinstance(e, Group): self.context = old_context - except OutputGeneratorError as err: + except FormatitkoRecursiveError as err: if not isinstance(e, ListContainer): err.add_element(e) raise err except Exception as err: - raise OutputGeneratorError(e) from err + raise FormatitkoRecursiveError(e) from err def escape_special_chars(self, text: str) -> str: return text @@ -248,13 +247,13 @@ class OutputGenerator: def generate_simple_inline_tag(self, tag: str, content: Union[ListContainer, Element, list[Union[Element, ListContainer]]], attributes: dict[str,str]={}): self.write(self.start_tag(tag, attributes)) - self._generate(content) + self.generate(content) self.write(self.end_tag(tag)) def generate_simple_block_tag(self, tag: str, content: Union[ListContainer, Element, list[Union[Element, ListContainer]]], attributes: dict[str,str]={}): self.writeln(self.start_tag(tag, attributes)) self.indent_more() - self._generate(content) + self.generate(content) self.indent_less() self.writeln(self.end_tag(tag)) @@ -273,27 +272,27 @@ class OutputGenerator: def generate_ListContainer(self, e: ListContainer): for child in e: - self._generate(child) + self.generate(child) def generate_list(self, e: list): for el in e: - self._generate(el) + self.generate(el) def generate_MetaList(self, e: MetaList): for child in e: - self._generate(child) + self.generate(child) def generate_MetaValue(self, e: MetaValue): try: self.TYPE_DICT_META[type(e)](e) except KeyError: - self._generate(e.content) + self.generate(e.content) def generate_MetaBlocks(self, e: MetaBlocks): - self._generate(e.content) + self.generate(e.content) def generate_MetaInlines(self, e: MetaInlines): - self._generate(e.content) + self.generate(e.content) def generate_MetaBool(self, e: MetaBool): self.generate_simple_tag(e) @@ -323,33 +322,33 @@ class OutputGenerator: if e.style == "cs": if e.quote_type == "SingleQuote": self.write("‚") - self._generate(e.content) + self.generate(e.content) self.write("‘") elif e.quote_type == "DoubleQuote": self.write("„") - self._generate(e.content) + self.generate(e.content) self.write("“") elif e.style == "en": if e.quote_type == "SingleQuote": self.write("‘") - self._generate(e.content) + self.generate(e.content) self.write("’") elif e.quote_type == "DoubleQuote": self.write("“") - self._generate(e.content) + self.generate(e.content) self.write("”") else: if e.quote_type == "SingleQuote": self.write("'") - self._generate(e.content) + self.generate(e.content) self.write("'") elif e.quote_type == "DoubleQuote": self.write("\"") - self._generate(e.content) + self.generate(e.content) self.write("\"") else: self.write("\"") - self._generate(e.content) + self.generate(e.content) self.write("\"") @@ -472,10 +471,10 @@ class OutputGenerator: def generate_Doc(self, e: Doc): if "header_content" in e.metadata: - self._generate(e.metadata["header_content"]) + self.generate(e.metadata["header_content"]) self.generate_simple_tag(e) if "footer_content" in e.metadata: - self._generate(e.metadata["footer_content"]) + self.generate(e.metadata["footer_content"]) def generate_BlockGroup(self, e: BlockGroup): self.generate_simple_tag(e) diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py index a1f2268..c97b90d 100644 --- a/src/formatitko/tex_generator.py +++ b/src/formatitko/tex_generator.py @@ -41,10 +41,10 @@ class UCWTexGenerator(OutputGenerator): text = text.replace("​", "") return text - def _generate(self, e: Union[Element, ListContainer]): + def generate(self, e: Union[Element, ListContainer]): if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": return - super()._generate(e) + super().generate(e) def writepar(self, text: str): self.ensure_empty(2) @@ -60,7 +60,7 @@ class UCWTexGenerator(OutputGenerator): def generate_Para(self, e: Para): self.ensure_empty(2) - self._generate(e.content) + self.generate(e.content) self.ensure_empty(2) def generate_HorizontalRule(self, e: HorizontalRule): @@ -72,7 +72,7 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"\ucwmodule{verb}") self.writeln(r"\ucwmodule{link}") self.writeln(r"\input formatitko.tex") - self._generate(e.content) + self.generate(e.content) self.writeln(r"\bye") def get_language_macro(self, lang: str): @@ -88,21 +88,21 @@ class UCWTexGenerator(OutputGenerator): def generate_InlineGroup(self, e: InlineGroup): self.write(r"{") self.write(self.get_language_macro(self.context.get_metadata("lang"))) - self._generate(e.content) + self.generate(e.content) self.write(r"}") def generate_BlockGroup(self, e: BlockGroup): self.writeln(r"\begingroup") self.indent_more() self.writeln(self.get_language_macro(self.context.get_metadata("lang"))) - self._generate(e.content) + self.generate(e.content) self.indent_less() self.writeln(r"\endgroup") def generate_Header(self, e: Header): self.ensure_empty(2) self.write("\\"+"sub"*(e.level-1)+"section{") - self._generate(e.content) + self.generate(e.content) self.write(r"}") self.ensure_empty(2) @@ -156,12 +156,12 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"\vskip5pt") self.writeln(r"\centerline{") self.indent_more() - self._generate(e.content) + self.generate(e.content) self.indent_less() self.writeln(r"}") self.writeln(r"\centerline{") self.indent_more() - self._generate(e.caption) + self.generate(e.caption) self.indent_less() self.writeln(r"}") self.writeln(r"\vskip5pt{}") @@ -173,7 +173,7 @@ class UCWTexGenerator(OutputGenerator): else: self.write(r"{\I{}") self._italic+=1 - self._generate(e.content) + self.generate(e.content) self._italic-=1 self.write(r"}") @@ -183,7 +183,7 @@ class UCWTexGenerator(OutputGenerator): else: self.write(r"{\bf{}") self._bold+=1 - self._generate(e.content) + self.generate(e.content) self._bold-=1 self.write(r"}") @@ -204,7 +204,7 @@ class UCWTexGenerator(OutputGenerator): def generate_Note(self, e: Note): self.write(r"\fn{") - self._generate(inlinify(e)) + self.generate(inlinify(e)) self.write(r"}") def generate_Table(self, e: Table): @@ -217,11 +217,11 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"\vskip1em") self.writeln(r"\halign{\strut"+"&".join([aligns[col[0]] for col in e.colspec])+r"\cr") self.indent_more() - self._generate(e.head.content) + self.generate(e.head.content) self.writeln(r"\noalign{\hrule}") - self._generate(e.content[0].content) + self.generate(e.content[0].content) self.writeln(r"\noalign{\hrule}") - self._generate(e.foot.content) + self.generate(e.foot.content) self.indent_less() self.writeln("}") self.writeln(r"\vskip1em") @@ -230,7 +230,7 @@ class UCWTexGenerator(OutputGenerator): for cell in e.content: if cell.colspan > 1: self.write(r"\multispan"+str(cell.colspan)+"{} ") - self._generate(cell.content) + self.generate(cell.content) if cell.next: self.write(" & ") self.write(r"\cr") @@ -245,10 +245,10 @@ class UCWTexGenerator(OutputGenerator): self.writeraw(e.text) def generate_Plain(self, e: Plain): - self._generate(e.content) + self.generate(e.content) def generate_Span(self, e: Span): - self._generate(e.content) + self.generate(e.content) def generate_CodeBlock(self, e: CodeBlock): self.writeln(r"\verbatim{") @@ -256,15 +256,15 @@ class UCWTexGenerator(OutputGenerator): self.writeln(r"}") def generate_Div(self, e: Div): - self._generate(e.content) + self.generate(e.content) def generate_LineBlock(self, e: LineBlock): self.writeln() - self._generate(e.content) + self.generate(e.content) self.writeln() def generate_LineItem(self, e: LineItem): - self._generate(e.content) + self.generate(e.content) if e.next: self.write(r"\\") self.endln() @@ -273,7 +273,7 @@ class UCWTexGenerator(OutputGenerator): self.ensure_empty(2) self.writeln(r"\list{o}") self.indent_more() - self._generate(e.content) + self.generate(e.content) self.indent_less() self.write(r"\endlist") self.ensure_empty(2) @@ -298,7 +298,7 @@ class UCWTexGenerator(OutputGenerator): style = delimiters[e.delimiter] self.writeln(r"\list{"+style+r"}") self.indent_more() - self._generate(e.content) + self.generate(e.content) self.indent_less() self.writeln(r"\endlist") self.ensure_empty(2) @@ -306,13 +306,13 @@ class UCWTexGenerator(OutputGenerator): def generate_ListItem(self, e: ListItem): self.endln() self.write(r"\:") - self._generate(e.content) + self.generate(e.content) self.endln() def generate_BlockQuote(self, e: BlockQuote): self.writeln(r"\blockquote{") self.indent_more() - self._generate(e.content) + self.generate(e.content) self.indent_less() self.writeln(r"}") @@ -321,17 +321,17 @@ class UCWTexGenerator(OutputGenerator): self.write(r"\url{") else: self.write(r"\linkurl{"+e.url+r"}{") - self._generate(e.content) + self.generate(e.content) self.write(r"}") # } def generate_Subscript(self, e: Subscript): self.write(r"\subscript{") - self._generate(e.content) + self.generate(e.content) self.write(r"}") def generate_Superscript(self, e: Superscript): self.write(r"\superscript{") - self._generate(e.content) + self.generate(e.content) self.write(r"}") def generate_simple_tag(self, e: Union[Element, None] = None, tag: str = "", attributes: Union[dict[str, str], None] = None, content: Union[ListContainer, Element, list[Union[Element, ListContainer]], str, None] = None, inline: Union[bool, None] = None): From 6de4ea2743ca86f044b585c5c4ddf68704102868 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Thu, 15 Feb 2024 18:19:10 +0100 Subject: [PATCH 03/22] Error handling now contains filename. --- src/formatitko/nop_processor.py | 11 +++++++++-- src/formatitko/output_generator.py | 8 +++++--- src/formatitko/transform_processor.py | 7 +------ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/formatitko/nop_processor.py b/src/formatitko/nop_processor.py index 5a89228..4cffa90 100644 --- a/src/formatitko/nop_processor.py +++ b/src/formatitko/nop_processor.py @@ -7,16 +7,20 @@ from typing import Union, Callable from .whitespace import NBSP from .elements import FQuoted -from .context import Group, InlineGroup, BlockGroup +from .context import Group, InlineGroup, BlockGroup, Context from .whitespace import Whitespace from .command import BlockCommand, InlineCommand, CodeCommand, Command from .output_generator import FormatitkoRecursiveError ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]] +class DoubleDocError(Exception): + "TransformProcessor should only ever see a single Doc." + pass class NOPProcessor: TYPE_DICT: dict[type, Callable] + context: Union[Context, None] = None class UnknownElementError(Exception): f"An unknown Element has been passed to the NOPProcessor, probably because panflute introduced a new one." @@ -120,7 +124,7 @@ class NOPProcessor: err.add_element(e) raise err except Exception as err: - raise FormatitkoRecursiveError(e) from err + raise FormatitkoRecursiveError(e, self.context.filename) from err def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: for i in range(len(e)): @@ -301,6 +305,9 @@ class NOPProcessor: return e def transform_Doc(self, e: Doc) -> Doc: + if self.context is not None: + raise DoubleDocError() + self.context = Context(e, self.root_file_path) e.content = self.transform(e.content) return e diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 9869b87..46431f7 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -21,9 +21,11 @@ class UnknownElementError(Exception): class FormatitkoRecursiveError(Exception): "A generic exception which wraps other exceptions and adds element-based traceback" elements: list[Union[Element, ListContainer, list[Union[Element, ListContainer]]]] + file: str - def __init__(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]], *args): + def __init__(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]], file: str, *args): self.elements = [e] + self.file = file super().__init__(args) def add_element(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): @@ -32,7 +34,7 @@ class FormatitkoRecursiveError(Exception): def pretty_print(self): def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) - eprint("Error occured in ", end="") + eprint(f"Error occured in file {self.file} in ", end="") for i in range(len(self.elements)-1, 0, -1): if hasattr(self.elements[i], "content") and isinstance(self.elements[i].content[0], Inline): eprint() @@ -159,7 +161,7 @@ class OutputGenerator: err.add_element(e) raise err except Exception as err: - raise FormatitkoRecursiveError(e) from err + raise FormatitkoRecursiveError(e, self.context.filename) from err def escape_special_chars(self, text: str) -> str: return text diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index 32a0306..60cba5a 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -20,15 +20,10 @@ from .context import Context, CommandCallable from .whitespace import Whitespace, bavlna from .command import BlockCommand, InlineCommand, CodeCommand, Command from .command_util import handle_command_define, parse_command -from .nop_processor import NOPProcessor, ELCl - -class DoubleDocError(Exception): - "TransformProcessor should only ever see a single Doc." - pass +from .nop_processor import NOPProcessor, ELCl, DoubleDocError class TransformProcessor(NOPProcessor): - context: Union[Context, None] = None root_file_path: str root_highlight_style: str = "default" _command_modules: list[tuple[Union[dict[str, CommandCallable], ModuleType], str]] = [] From 50b29b1ae35ad2f9ebc71b5b0bb618869236c593 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 17 Feb 2024 18:07:47 +0100 Subject: [PATCH 04/22] Improved error messages --- src/formatitko/nop_processor.py | 2 +- src/formatitko/output_generator.py | 28 +++++++++++++++++----------- test/test-files/test-partial.md | 9 +++++++++ 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/formatitko/nop_processor.py b/src/formatitko/nop_processor.py index 4cffa90..bc20a2a 100644 --- a/src/formatitko/nop_processor.py +++ b/src/formatitko/nop_processor.py @@ -124,7 +124,7 @@ class NOPProcessor: err.add_element(e) raise err except Exception as err: - raise FormatitkoRecursiveError(e, self.context.filename) from err + raise FormatitkoRecursiveError(e, self.context) from err def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: for i in range(len(e)): diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 46431f7..4f3cd24 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -21,11 +21,11 @@ class UnknownElementError(Exception): class FormatitkoRecursiveError(Exception): "A generic exception which wraps other exceptions and adds element-based traceback" elements: list[Union[Element, ListContainer, list[Union[Element, ListContainer]]]] - file: str + context: Context - def __init__(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]], file: str, *args): + def __init__(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]], context: Context, *args): self.elements = [e] - self.file = file + self.context = context super().__init__(args) def add_element(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): @@ -34,15 +34,21 @@ class FormatitkoRecursiveError(Exception): def pretty_print(self): def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) - eprint(f"Error occured in file {self.file} in ", end="") + + def print_filename_recursive(context: Context): + return context.filename +\ + ((" (included from " + print_filename_recursive(context.parent) + ")") if context.parent else "") + eprint(f"Error occured in file {print_filename_recursive(self.context)} in ", end="") + line = None for i in range(len(self.elements)-1, 0, -1): - if hasattr(self.elements[i], "content") and isinstance(self.elements[i].content[0], Inline): - eprint() - eprint('on line: "' + stringify(self.elements[i]) + '"', end="") - break - eprint(type(self.elements[i]).__name__ + "[" + str(self.elements[i-1].index) + "]", end=": ") + if hasattr(self.elements[i], "content") and len(self.elements[i].content) > 0 and isinstance(self.elements[i].content[0], Inline) and line is None: + line = self.elements[i] + eprint(type(self.elements[i]).__name__ + "[" + (str(self.elements[i-1].index) if isinstance(self.elements[i-1].index, int) else "") + "]", end=": ") + if line: + eprint() + eprint('on line: "' + stringify(line).strip() + '"', end="") eprint() - eprint("in element: " + str(self.elements[0])) + eprint("in element: " + str(self.elements[0]).replace("\n", "\\n")) sys.tracebacklimit = 0 raise self.__cause__ from None @@ -161,7 +167,7 @@ class OutputGenerator: err.add_element(e) raise err except Exception as err: - raise FormatitkoRecursiveError(e, self.context.filename) from err + raise FormatitkoRecursiveError(e, self.context) from err def escape_special_chars(self, text: str) -> str: return text diff --git a/test/test-files/test-partial.md b/test/test-files/test-partial.md index f1c9ab5..ee2fd6b 100644 --- a/test/test-files/test-partial.md +++ b/test/test-files/test-partial.md @@ -56,6 +56,15 @@ $$ $$ + + + + ![This is a figure, go figure...](logo.svg){width=25%}What ![This is a figure, go figure...](logo.pdf){width=50%} From 723038a2bdf1f6ffd5c46671664246267e040a4c Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 17 Feb 2024 18:24:08 +0100 Subject: [PATCH 05/22] Deprecated old versions of transform, html and tex generation. --- src/formatitko/html.py | 311 ------------------------------------ src/formatitko/tex.py | 270 ------------------------------- src/formatitko/transform.py | 176 -------------------- 3 files changed, 757 deletions(-) delete mode 100644 src/formatitko/html.py delete mode 100644 src/formatitko/tex.py delete mode 100644 src/formatitko/transform.py diff --git a/src/formatitko/html.py b/src/formatitko/html.py deleted file mode 100644 index 0403082..0000000 --- a/src/formatitko/html.py +++ /dev/null @@ -1,311 +0,0 @@ -from panflute import * -from pygments import highlight -from pygments.lexers import get_lexer_by_name -from pygments.formatters import HtmlFormatter -from pygments.util import ClassNotFound -import os -from typing import Union - -from .whitespace import NBSP -from .elements import FQuoted -from .katex import KatexClient -from .util import inlinify -from .context import Group -from .images import ImageProcessor - -import warnings -warnings.warn("The html function has been deprecated, is left only for reference and will be removed in future commits. HTML_generator should be used in its place.", DeprecationWarning) - -def html(e: Union[Element, ListContainer], k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str: - - warnings.warn("The html function has been deprecated, is left only for reference and will be removed in future commits. HTML_generator should be used in its place.", DeprecationWarning) - - # `only` attribute which makes transformed elements appear only in tex - # output or html output - if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": - return "" - - if isinstance(e, ListContainer): - return ''.join([html(child, k, i, indent_level, indent_str) for child in e]) - - # Bits from which the final element output is built at the end of this - # function. Most elements override this by returning their own output. - tag = e.tag.lower() - attributes = "" - content_foot = "" - content_head = "" - - if isinstance(e, Str): - return e.text.replace(" ", " ") - - # Most elements fit the general template at the end of the function, just - # need their html tag specified. - tags = { - BulletList: "ul", - Doc: "main", - Emph: "em", - Caption: "figcaption", - Para: "p", - Header: "h"+str(e.level) if isinstance(e, Header) else "", - LineBlock: "p", - ListItem: "li", - SmallCaps: "span", - Strikeout: "strike", - Subscript: "sub", - Superscript: "sup", - Underline: "u", - TableBody: "tbody", - TableHead: "thead", - TableFoot: "tfoot", - TableRow: "tr", - TableCell: "td", - } - if type(e) in tags: - tag = tags[type(e)] - - # These are also disabled in pandoc so they shouldn't appear in the AST at all. - not_implemented = { - Citation: True, - Cite: True, - Definition: True, - DefinitionItem: True, - DefinitionList: True - } - if type(e) in not_implemented: - return f'' - - # Elements which can be represented by a simple string - simple_string = { - NBSP: " ", - Space: " ", - Null: "", - LineBreak: f"\n{indent_level*indent_str}
\n{indent_level*indent_str}", - SoftBreak: f" ", - HorizontalRule: f"{indent_level*indent_str}
\n" - } - if type(e) in simple_string: - return simple_string[type(e)] - - if hasattr(e, "identifier") and e.identifier != "": - attributes += f' id="{e.identifier}"' - - if hasattr(e, "classes") and len(e.classes) != 0: - attributes += f' class="{" ".join(e.classes)}"' - - # Attributes are only passed down manually, because we use them internally. - # Maybe this should be a blocklist instead of an allowlist? - - # Overriding elements with their own returns - if isinstance(e, CodeBlock): - if len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'): - # Syntax highlighting using pygments - for cl in e.classes: - try: - lexer = get_lexer_by_name(cl) - except ClassNotFound: - continue - break - else: - print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}") - formatter = HtmlFormatter(style=e.attributes["style"]) - result = highlight(e.text, lexer, formatter) - return f'{result}' - else: - return f'
{e.text}
' - - if isinstance(e, Doc): - formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default") - content_head = f'' - - if isinstance(e, Image): - url = e.url - - # Attributes → image processor args - additional_args = {} - if "file-width" in e.attributes: - additional_args["width"] = int(e.attributes["file-width"]) - if "file-height" in e.attributes: - additional_args["height"] = int(e.attributes["file-height"]) - if "file-quality" in e.attributes: - additional_args["quality"] = int(e.attributes["file-quality"]) - if "file-dpi" in e.attributes: - additional_args["dpi"] = int(e.attributes["file-dpi"]) - - # The directory of the current file, will also look for images there. - source_dir = e.attributes["source_dir"] - - _, ext = os.path.splitext(url) - ext = ext[1:] - - # Conversions between various formats. - if ext in ["svg", "png", "jpeg", "gif"]: - # Even supported elements have to be 'converted' because the - # processing contains finding and moving them to the output - # directory. - url = i.process_image(url, ext, source_dir, **additional_args) - elif ext in ["pdf", "epdf"]: - if not "dpi" in additional_args: - additional_args["dpi"] = 300 - url = i.process_image(url, "png", source_dir, **additional_args) - elif ext in ["jpg"]: - url = i.process_image(url, "jpeg", source_dir, **additional_args) - else: - url = i.process_image(url, "png", source_dir, **additional_args) - - # Srcset generation - multiple alternative sizes of images browsers can - # choose from. - _, ext = os.path.splitext(url) - ext = ext[1:] - srcset = [] - if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'): - # This is inspired by @vojta001's blogPhoto shortcode he made for - # patek.cz: - # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html - width, height = i.get_image_size(url, [i.public_dir]) - sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) - for size in sizes: - if width <= size[0] and height <= size[1]: - srcset.append((f'{i.web_path}/{url}', f'{width}w')) - break - quality = size[2] if ext == "jpeg" else None - srcset.append((f'{i.web_path}/{i.process_image(url, ext, i.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w')) - - url = i.web_path + "/" + url - - attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"' - if len(srcset) != 0: - return f'' - else: - return f'' - - # See https://pandoc.org/MANUAL.html#line-blocks - if isinstance(e, LineItem): - return indent_level*indent_str + html(e.content, k, i) + "
\n" - - # Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before me)) - if isinstance(e, Note): - content_head = "(" - content_foot = ")" - if inlinify(e) is not None: - return f' ({html(inlinify(e), k, i, 0, "")})' - - if isinstance(e, FQuoted): - if e.style == "cs": - if e.quote_type == "SingleQuote": - return f'‚{html(e.content, k, i, 0, "")}‘' - elif e.quote_type == "DoubleQuote": - return f'„{html(e.content, k, i, 0, "")}“' - elif e.style == "en": - if e.quote_type == "SingleQuote": - return f'‘{html(e.content, k, i, 0, "")}’' - elif e.quote_type == "DoubleQuote": - return f'“{html(e.content, k, i, 0, "")}”' - else: - if e.quote_type == "SingleQuote": - return f'\'{html(e.content, k, i, 0, "")}\'' - elif e.quote_type == "DoubleQuote": - return f'"{html(e.content, k, i, 0, "")}"' - else: - return f'"{html(e.content, k, i, 0, "")}"' - - if isinstance(e, Group): - k.begingroup() - ret = html(e.content, k, i, indent_level, indent_str) - k.endgroup() - return ret - - if isinstance(e, Math): - formats = { - "DisplayMath": True, - "InlineMath": False - } - return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]}) - - if isinstance(e, RawInline): - if e.format == "html": - return e.text - else: - return "" - - if isinstance(e, RawBlock): - if e.format == "html": - return f'{e.text}\n' - else: - return "" - - - # Non-overriding elements, they get generated using the template at the end - # of this function - if isinstance(e, Header): - tag = "h"+str(e.level) - - if isinstance(e, Figure): - content_foot = html(e.caption, k, i, indent_level+1, indent_str) - - if isinstance(e, Caption): - tag = "figcaption" - - if isinstance(e, Link): - tag = "a" - attributes += f' href="{e.url}"' - if e.title: - attributes += f' title="{e.title}"' - - if isinstance(e, OrderedList): - tag = "ol" - if e.start and e.start != 1: - attributes += f' start="{e.start}"' - html_styles = { - "Decimal": "1", - "LowerRoman": "i", - "UpperRoman:": "I", - "LowerAlpha": "a", - "UpperAlpha": "A" - } - if e.style and e.style != "DefaultStyle": - attributes += f' type="{html_styles[e.style]}"' - # FIXME: Delimeter styles - - if isinstance(e, Table): - content_head = html(e.head, k, i, indent_level+1, indent_str) - content_foot = html(e.foot, k, i, indent_level+1, indent_str) - # FIXME: Fancy pandoc tables, using colspec - - if isinstance(e, TableCell): - tag = "td" - if e.colspan != 1: - attributes += f' colspan="{e.colspan}"' - if e.rowspan != 1: - attributes += f' rowspan="{e.rowspan}"' - aligns = { - "AlignLeft": "left", - "AlignRight": "right", - "AlignCenter": "center" - } - if e.alignment and e.alignment != "AlignDefault": - attributes += f' style="text-align: {aligns[e.alignment]}"' - - # The default which all non-overriding elements get generated by. This - # includes elements, which were not explicitly mentioned in this function, - # e. g. Strong - - if isinstance(e, Inline): - return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}' - - out_str = "" - if not isinstance(e, Plain): - out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n" - out_str += content_head - if hasattr(e, "_content"): - if len(e.content) > 0 and isinstance(e.content[0], Inline): - out_str += (indent_level+1)*indent_str - out_str += html(e.content, k, i, indent_level+1, indent_str) - if hasattr(e, "text"): - out_str += e.text - out_str += f"{content_foot}\n" - if not isinstance(e, Plain): - out_str += f"{indent_level*indent_str}\n" - - return out_str - - diff --git a/src/formatitko/tex.py b/src/formatitko/tex.py deleted file mode 100644 index 74b6e6f..0000000 --- a/src/formatitko/tex.py +++ /dev/null @@ -1,270 +0,0 @@ -from panflute import * -import os -from typing import Union - -from .whitespace import NBSP -from .elements import FQuoted -from .util import inlinify -from .context import Group -from .images import ImageProcessor - -# Heavily inspired by: git://git.ucw.cz/labsconf2022.git -def tex(e: Union[Element, ListContainer], i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str: - - # `only` attribute which makes transformed elements appear only in tex - # output or html output - if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": - return "" - - if isinstance(e, ListContainer): - return ''.join([tex(child, i, indent_level, indent_str) for child in e]) - - # Bits from which the final element output is built at the end of this - # function. Most elements override this by returning their own output. - content_foot = "" - content_head = "" - - arguments = "" - open = "{" - close = "}" - - tag = e.tag.lower() - - tags = { - Header: "h"+chr(64 + e.level) if isinstance(e, Header) else "", - } - if type(e) in tags: - tag = tags[type(e)] - - # These are also disabled in pandoc so they shouldn't appear in the AST at all. - not_implemented = { - Citation: True, - Cite: True, - Definition: True, - DefinitionItem: True, - DefinitionList: True - } - if type(e) in not_implemented: - return f'% FIXME: {type(e)}s not implemented \n' - - # Elements which can be represented by a simple string - simple_string = { - NBSP: "~", - Space: " ", - Null: "", - LineBreak: f"\\\\", - SoftBreak: f" ", - HorizontalRule: "\\hr\n\n" - } - if type(e) in simple_string: - return simple_string[type(e)] - - # Simplest basic elements - if isinstance(e, Str): - return e.text.replace(" ", "~") - - if isinstance(e, Para): - return tex(e.content, i, 0, "")+"\n\n" - - if isinstance(e, Span) or isinstance(e, Plain): - return tex(e.content, i, 0, "") - - # Overriding elements with their own returns - if isinstance(e, Image): - url = e.url - - # TODO: This should use OutputGenerator's get_image_processor_args - # Attributes → image processor args - additional_args = {} - if "file-width" in e.attributes: - additional_args["width"] = int(e.attributes["file-width"]) - if "file-height" in e.attributes: - additional_args["height"] = int(e.attributes["file-height"]) - if "file-quality" in e.attributes: - additional_args["quality"] = int(e.attributes["file-quality"]) - if "file-dpi" in e.attributes: - additional_args["dpi"] = int(e.attributes["file-dpi"]) - - # The directory of the current file, will also look for images there. - source_dir = e.attributes["source_dir"] - - _, ext = os.path.splitext(url) - ext = ext[1:] - - # Conversions between various formats. - if ext in ["pdf", "png", "jpeg"]: - # Even supported elements have to be 'converted' because the - # processing contains finding and moving them to the cache - # directory. - url = i.process_image(url, ext, source_dir, **additional_args) - elif ext in ["svg"]: - url = i.process_image(url, "pdf", source_dir, **additional_args) - elif ext in ["epdf"]: - url = i.process_image(url, "pdf", source_dir, **additional_args) - elif ext in ["jpg"]: - url = i.process_image(url, "jpeg", source_dir, **additional_args) - else: - url = i.process_image(url, "pdf", source_dir, **additional_args) - - url = i.find_image(url, [i.cache_dir]) - width = "" - if "width" in e.attributes: - width = e.attributes["width"] - # 50% → 0.5\hsize - if e.attributes["width"][-1] == "%": - width = str(int(e.attributes["width"][:-1])/100) + "\\hsize" - width = "width " + width - return f'\\image{{{width}}}{{{url}}}' - - if isinstance(e, FQuoted): - if e.style == "cs": - if e.quote_type == "SingleQuote": - return f'‚{tex(e.content, i, 0, "")}‘' - elif e.quote_type == "DoubleQuote": - return f'„{tex(e.content, i, 0, "")}“' - elif e.style == "en": - if e.quote_type == "SingleQuote": - return f'‘{tex(e.content, i, 0, "")}’' - elif e.quote_type == "DoubleQuote": - return f'“{tex(e.content, i, 0, "")}”' - else: - if e.quote_type == "SingleQuote": - return f'\'{tex(e.content, i, 0, "")}\'' - elif e.quote_type == "DoubleQuote": - return f'"{tex(e.content, i, 0, "")}"' - else: - return f'"{tex(e.content, i, 0, "")}"' - - if isinstance(e, Code): - return f'\\verb`{e.text.replace("`", "backtick")}`' - - if isinstance(e, Figure): - return f'\\figure{{{tex(e.content, i, indent_level+1, indent_str)}}}{{{tex(e.caption, i, indent_level+1, indent_str)}}}\n\n' - - # Figure caption - if isinstance(e, Caption): - if inlinify(e) is not None: - return f'\\figcaption{{{tex(e.content, i, 0, "")}}}' - - if isinstance(e, Math): - if e.format == "DisplayMath": - return f'$${e.text}$$\n' - else: - return f'${e.text}$' - - # Footnote - if isinstance(e, Note): - tag = "fn" - if inlinify(e) is not None: - return f'\\fn{{{tex(inlinify(e), i, 0, "")}}}' - - if isinstance(e, Table): - aligns = { - "AlignLeft": "\\quad#\\quad\\hfil", - "AlignRight": "\\quad\\hfil#\\quad", - "AlignCenter": "\\quad\\hfil#\\hfil\\quad", - "AlignDefault": "\\quad#\\quad\\hfil" - } - text = "\strut"+"&".join([aligns[col[0]] for col in e.colspec])+"\cr\n" - text += tex(e.head.content, i, 0, "") - text += "\\noalign{\\hrule}\n" - text += tex(e.content[0].content, i, 0, "") - text += "\\noalign{\\hrule}\n" - text += tex(e.foot.content, i, 0, "") - return "\\vskip1em\n\\halign{"+text+"}\n\\vskip1em\n" - # FIXME: Implement rowspan - - if isinstance(e, TableRow): - return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, i, 0, "") for cell in e.content])+"\cr\n" - - if isinstance(e, RawInline): - if e.format == "tex": - return e.text - else: - return "" - - if isinstance(e, RawBlock): - if e.format == "tex": - return f'{e.text}\n' - else: - return "" - - # See https://pandoc.org/MANUAL.html#line-blocks - if isinstance(e, LineBlock): - return f'{tex(e.content, i, indent_level+1, indent_str)}\n' - - if isinstance(e, LineItem): - return tex(e.content, i, 0, "") + ("\\\\\n" if e.next else "\n") - - if type(e) is Div: - return f'{tex(e.content, i, indent_level+1, indent_str)}' - - if isinstance(e, Doc): - return tex(e.content, i, indent_level, indent_str)+"\n\\bye" # Is having the \bye a bad idea here? - - - # Non-overriding elements, they get generated using the template at the end - # of this function - if isinstance(e, BulletList): - tag = "list" - open = "" - arguments = "{o}" - close = "\\endlist" - - elif isinstance(e, OrderedList): - tag = "list" - open = "" - styles = { - "DefaultStyle": "n", - "Decimal": "n", - "LowerRoman": "i", - "UpperRoman:": "I", - "LowerAlpha": "a", - "UpperAlpha": "A" - } - style = styles[e.style] - delimiters = { - "DefaultDelim": f"{style}.", - "Period": f"{style}.", - "OneParen": f"{style})", - "TwoParens": f"({style})" - } - style = delimiters[e.delimiter] - arguments = f"{{{style}}}" - close = "\\endlist" - # FIXME: Starting number of list - - elif isinstance(e, ListItem): - tag = ":" - - elif isinstance(e, Link): - if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url: - tag = "url" - else: - tag = "linkurl" - arguments = f'{{{e.url}}}' - - elif isinstance(e, Group): - tag = "begingroup" - open = "" - if "lang" in e.metadata and e.metadata["lang"] is not None: - open = "\\language"+e.metadata["lang"] - close = "\\endgroup" - - # The default which all non-overriding elements get generated by. This - # includes elements, which were not explicitly mentioned in this function, - # e. g. Strong, Emph... - - if isinstance(e, Inline): - return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}' - - out_str = "" - out_str = f"\\{tag}{arguments}{open}\n" - out_str += content_head - if hasattr(e, "_content"): - out_str += tex(e.content, i, indent_level+1, indent_str) - if hasattr(e, "text"): - out_str += e.text - out_str += f"{content_foot}\n{close}\n\n" - - return out_str diff --git a/src/formatitko/transform.py b/src/formatitko/transform.py deleted file mode 100644 index 69679d9..0000000 --- a/src/formatitko/transform.py +++ /dev/null @@ -1,176 +0,0 @@ -from panflute import Element, Div, Span, Quoted, Image, CodeBlock, Str, MetaInlines, MetaString, MetaBool, RawBlock -import re -import os - -# Import local files -from .whitespace import Whitespace, NBSP, bavlna -from .util import nullify, import_md -from .context import Context, BlockGroup -from .command import Command, BlockCommand, InlineCommand -from .command_util import handle_command_define, parse_command -from .elements import FQuoted - - -import warnings -warnings.warn("The transform function has been deprecated, is left only for reference and will be removed in future commits. TransformProcessor should be used in its place.", DeprecationWarning) - -# This is where tha magic happens. This function transforms a single element, -# to transform the entire tree, panflute's walk should be used. -def transform(e: Element, c: Context) -> Element: - - warnings.warn("The transform function has been deprecated, is left only for reference and will be removed in future commits. TransformProcessor should be used in its place.", DeprecationWarning) - # Determine if this space should be non-breakable. See whitespace.py. - if isinstance(e, Whitespace) and bavlna(e, c): - e = NBSP() - - if hasattr(e, "attributes"): - # `if` attribute. Only show this element if flag is set. - if "if" in e.attributes: - if not c.is_flag_set(e.attributes["if"]): - return nullify(e) - # `ifn` attribute. Only show this element if flag is NOT set - if "ifn" in e.attributes: - if c.is_flag_set(e.attributes["ifn"]): - return nullify(e) - - # There are multiple ways to call a command so we turn it into a - # unified element first and then call it at the end. This handles the - # []{c=commandname} and - # :::{c=commandname} - # ::: - # syntax. - if (isinstance(e, Div) or isinstance(e, Span)) and "c" in e.attributes: - if isinstance(e, Div): - e = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) - else: - e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) - - # Isolated subdocuments using Group and a different Context. Can be - # separate files (using attribute `partial`) or be inline using the - # following syntax: - # ```markdown {.group} - # * file content * - # ``` - # Both can contain their own metadata in a FrontMatter (YAML header) - if (isinstance(e, Div) and "partial" in e.attributes)\ - or (isinstance(e, CodeBlock) and "markdown" in e.classes and "group" in e.classes): - if isinstance(e, Div): - if not c.trusted: # If we're in an untrusted context, we shouldn't allow inclusion of files outside the PWD. - full_path = os.path.abspath(c.dir + "/" + e.attributes["partial"]) - pwd = os.path.abspath(".") - if os.path.commonpath([full_path, pwd]) != os.path.commonpath([pwd]): - return nullify(e) - text = open(c.dir + "/" + e.attributes["partial"], "r").read() - path = c.dir + "/" + e.attributes["partial"] - else: - text = e.text - path = c.path - if "type" in e.attributes and e.attributes["type"] in ["tex", "html"]: - e = RawBlock(text, e.attributes["type"]) - else: - includedDoc = import_md(text) - trusted = True - if "untrusted" in e.attributes and (e.attributes["untrusted"] == True or e.attributes["untrusted"] == 'True'): - trusted = False - if not c.trusted: - trusted = False - nContext = Context(includedDoc, path, c, trusted=trusted) - language = includedDoc.get_metadata("lang") - includedDoc = includedDoc.walk(transform, nContext) - e = BlockGroup(*includedDoc.content, context=nContext, metadata={"lang": language}) - - # Transform panflute's Quoted to custom FQuoted, see above. - if isinstance(e, Quoted): - quote_styles = { - "cs": "cs", - "en": "en", - "sk": "cs", - None: None - } - e = FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[c.get_metadata("lang")]) - - if isinstance(e, Image): - # Pass down the directory of the current source file for finding image - # files. - e.attributes["source_dir"] = c.dir - # Pass down "no-srcset" metadatum as attribute down to images. - if not "no-srcset" in e.attributes: - e.attributes["no-srcset"] = c.get_metadata("no-srcset") if c.get_metadata("no-srcset") is not None else False - - # Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks - if isinstance(e, CodeBlock): - if not "highlight" in e.attributes: - e.attributes["highlight"] = c.get_metadata("highlight") if c.get_metadata("highlight") is not None else True - if not "style" in e.attributes: - e.attributes["style"] = c.get_metadata("highlight-style") if c.get_metadata("highlight-style") is not None else "default" - e.attributes["noclasses"] = False - # I think this is supposed to enable inline styles for highlighting when the style differs from the document, but it clearly doesn't work. a) HTML_generator never accesses it and b) Only the top-level document contains a style so you have to ask the top level context, not the current context. - else: - e.attributes["noclasses"] = True - - # Execute python code inside source code block. Works the same as commands. - # Syntax: - # ```python {.run} - # print("woo") - # ``` - if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and "run" in e.classes: - if not c.trusted: - return nullify(e) - command_output = parse_command(e.text)(BlockCommand(), c) - e = BlockCommand().replaceSelf(*([] if command_output is None else command_output)) - e = e.walk(transform, c) - - # Command defines for calling using BlockCommand and InlineCommand. If - # redefine is used instead of define, the program doesn't check if the - # command already exists. - # Syntax: - # ```python {define=commandname} - # print(wooo) - # ``` - if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and hasattr(e, "attributes")\ - and ("define" in e.attributes or "redefine" in e.attributes): - if not c.trusted: - return nullify(e) - e = handle_command_define(e, c) - - ## Shorthands - # Shorter (and sometimes the only) forms of certain features - if isinstance(e, Span) and len(e.content) == 1 and isinstance(e.content[0], Str): - ## Handle special command shorthand [!commandname]{} - if re.match(r"^![\w.]+$", e.content[0].text): - e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]}) - - ## Handle import [#path/file.md]{} - # This is the exact opposite of partials. We take the commands, flags - # and metadata but drop the content. - elif re.match(r"^#.+$", e.content[0].text): - importedDoc = import_md(open(c.dir + "/" + e.content[0].text[1:], "r").read()) - importedDoc.walk(transform, c) - return nullify(e) - - ## Handle metadata print [$key1.key2]{} - # This is a shorthand for just printing the content of some metadata. - elif re.match(r"^\$[\w.]+$", e.content[0].text): - val = c.get_metadata(e.content[0].text[1:], False) - if isinstance(val, MetaInlines): - e = Span(*val.content) - e = e.walk(transform, c) - elif isinstance(val, MetaString): - e = Span(Str(val.string)) - elif isinstance(val, MetaBool): - e = Span(Str(str(val.boolean))) - else: - raise TypeError(f"Cannot print value of metadatum '{e.content[0].text[1:]}' of type '{type(val)}'") - - ## Execute commands - # panflute's walk function transforms the children first, then the root - # element, so the content the command receives is already transformed. - # The output from the command is then transformed manually again. - if isinstance(e, Command): - if not c.get_command(e.attributes["c"]): - raise NameError(f"Command not defined '{e.attributes['c']}'.") - command_output = c.get_command(e.attributes["c"])(e, c) - e = e.replaceSelf(*command_output) - e = e.walk(transform, c) - - return e From ce0a3e1192c662219b49380069b4c345291b8c1b Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 17 Feb 2024 21:34:11 +0100 Subject: [PATCH 06/22] Actually deprecate the old stuff (forgot a few imports). --- src/formatitko/formatitko.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 9ee76ee..139d69a 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -7,12 +7,9 @@ import subprocess import shutil # Import local files -from .transform import transform from .util import import_md from .context import Context, BlockGroup from .katex import KatexClient -from .html import html -from .tex import tex from .images import ImageProcessor from .output_generator import OutputGenerator, FormatitkoRecursiveError from .html_generator import HTMLGenerator From 1950ab56e67045c592e53b23343fc521b7488046 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 17 Feb 2024 23:46:46 +0100 Subject: [PATCH 07/22] Errors now print full path from CWD, not just filename --- src/formatitko/output_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 4f3cd24..332bb25 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -36,7 +36,7 @@ class FormatitkoRecursiveError(Exception): print(*args, file=sys.stderr, **kwargs) def print_filename_recursive(context: Context): - return context.filename +\ + return context.path +\ ((" (included from " + print_filename_recursive(context.parent) + ")") if context.parent else "") eprint(f"Error occured in file {print_filename_recursive(self.context)} in ", end="") line = None From b9c193d45f6706c34aa228eff7e26f67959329b0 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 17 Feb 2024 23:47:30 +0100 Subject: [PATCH 08/22] Implemented image processor namespaces --- src/formatitko/context.py | 4 + src/formatitko/formatitko.py | 4 +- src/formatitko/html_generator.py | 30 +++--- src/formatitko/images.py | 171 +++++++++++++++++++++++-------- src/formatitko/tex_generator.py | 20 ++-- 5 files changed, 163 insertions(+), 66 deletions(-) diff --git a/src/formatitko/context.py b/src/formatitko/context.py index caa321c..31c1587 100644 --- a/src/formatitko/context.py +++ b/src/formatitko/context.py @@ -28,6 +28,8 @@ class Context: path: str dir: str filename: str + root_dir: str # Absolute path to the dir of the file formátítko was called on + rel_dir: str # Relative path to the current dir from the root dir def __init__(self, doc: Doc, path: str, parent: Union['Context', None]=None, trusted: bool=True): self.parent = parent @@ -37,6 +39,8 @@ class Context: self.path = path self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "." self.filename = os.path.basename(path) + self.root_dir = parent.root_dir if parent else os.path.abspath(self.dir) + self.rel_dir = os.path.relpath(self.dir, self.root_dir) if self.get_metadata("flags", immediate=True) is None: self.set_metadata("flags", {}) diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 139d69a..486c5a4 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -10,7 +10,7 @@ import shutil from .util import import_md from .context import Context, BlockGroup from .katex import KatexClient -from .images import ImageProcessor +from .images import ImageProcessor, ImageProcessorNamespace from .output_generator import OutputGenerator, FormatitkoRecursiveError from .html_generator import HTMLGenerator from .transform_processor import TransformProcessor @@ -62,7 +62,7 @@ def main(): e.pretty_print() # Initialize the image processor (this just keeps some basic state) - imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs) + imageProcessor = ImageProcessor({"": ImageProcessorNamespace(args.img_public_dir, args.img_web_path, args.img_cache_dir, args.img_lookup_dirs, True)}) if args.output_html is not None: # Initialize KaTeX client (this runs the node app and connects to a unix socket) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index d564044..54002ce 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -17,7 +17,7 @@ from .whitespace import NBSP from .context import Group, BlockGroup, InlineGroup from .output_generator import OutputGenerator from .katex import KatexClient -from .images import ImageProcessor +from .images import ImageProcessor, ImageProcessorNamespaceSearcher from .util import inlinify class HTMLGenerator(OutputGenerator): @@ -137,8 +137,12 @@ class HTMLGenerator(OutputGenerator): additional_args = self.get_image_processor_args(e.attributes) - # The directory of the current file, will also look for images there. + # The directory of the current file relative to the current working directory source_dir = self.context.dir + # The directory of the current file relative to the md file we were called on + rel_dir = self.context.rel_dir + + searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir) _, ext = os.path.splitext(url) ext = ext[1:] @@ -148,16 +152,16 @@ class HTMLGenerator(OutputGenerator): # Even supported elements have to be 'converted' because the # processing contains finding and moving them to the output # directory. - url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) + url = self.imageProcessor.process_image(url, ext, searcher, **additional_args) elif ext in ["pdf", "epdf","asy"]: # Only relevant for when these were PNGs, leaving this here for future reference. # if not "dpi" in additional_args: # additional_args["dpi"] = 300 - url = self.imageProcessor.process_image(url, "svg", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args) elif ext in ["jpg"]: - url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args) else: - url = self.imageProcessor.process_image(url, "png", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "png", searcher, **additional_args) # Srcset generation - multiple alternative sizes of images browsers can # choose from. @@ -168,19 +172,19 @@ class HTMLGenerator(OutputGenerator): # This is inspired by @vojta001's blogPhoto shortcode he made for # patek.cz: # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html - width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir]) + width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir())) sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) for size in sizes: if width <= size[0] and height <= size[1]: - srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w')) + srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w')) break quality = size[2] if ext == "jpeg" else None - cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality) - self.imageProcessor.publish_image(cache_img) - srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w')) + cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality) + searcher.publish_image(cache_img) + srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w')) - self.imageProcessor.publish_image(url) - url = self.imageProcessor.web_path + "/" + url + searcher.publish_image(url) + url = searcher.get_web_path() + "/" + url attributes = self.common_attributes(e) if "width" in e.attributes: diff --git a/src/formatitko/images.py b/src/formatitko/images.py index 73220dc..0f65b7b 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -4,46 +4,161 @@ import shutil import subprocess from PIL import Image + class FileInWrongDirError(Exception): pass + class ConversionProgramError(Exception): pass + class InkscapeError(ConversionProgramError): pass + class ImageMagickError(ConversionProgramError): pass + class AsyError(ConversionProgramError): pass - -class ImageProcessor: +class ImageProcessorNamespace: public_dir: str cache_dir: str lookup_dirs: list[str] web_path: str + include_src: bool - def __init__(self, public_dir: str, web_path: str, cache_dir: str, *lookup_dirs: list[str]): + def __init__(self, public_dir: str, web_path: str, cache_dir: str, lookup_dirs: list[str], include_src: bool): self.public_dir = public_dir self.cache_dir = cache_dir self.lookup_dirs = lookup_dirs self.web_path = web_path if web_path[-1] != "/" else web_path[:-1] + self.include_src = include_src if not os.path.exists(self.public_dir): os.mkdir(self.public_dir) if not os.path.exists(self.cache_dir): os.mkdir(self.cache_dir) - def process_image(self, input_filename: str, format: str, source_dir: str, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: + +class ImageProcessorSearcher: + def get_lookup_dirs(self) -> list[str]: + return [] + + def get_cache_dir(self) -> str: + return "" + + def get_public_dir(self) -> str: + return "" + + def get_web_path(self) -> str: + return "" + + def find_image_in_dir(self, input_filename: str, dir: str) -> Union[str, None]: + if os.path.isfile(dir + "/" + input_filename): + return dir + "/" + input_filename + else: + return None + + def find_image(self, input_filename: str) -> Union[str, None]: + for dir in self.get_lookup_dirs(): + image = self.find_image_in_dir(input_filename, dir) + if image: + return image + return None + + def publish_image(self, target_name, relative: bool=True) -> str: + cache_path = self.get_cache_dir() + "/" + target_name + if not os.path.isfile(cache_path): + raise FileNotFoundError(f'Image {target_name} not cached') + target_path = self.get_public_dir() + "/" + target_name + try: + if os.path.exists(target_path): + if os.path.getmtime(cache_path) > os.path.getmtime(target_path): + os.remove(target_path) + os.link(cache_path, target_path) + else: + os.link(cache_path, target_path) + except OSError as e: + if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy + shutil.copyfile(cache_path, target_path) + else: + raise e + return target_name if relative else target_path + + + +class ImageProcessorCacheSearcher(ImageProcessorSearcher): + cache_dir: str + + def __init__(self, cache_dir: str): + self.cache_dir = cache_dir + + def get_lookup_dirs(self) -> list[str]: + return [self.cache_dir] + + def get_cache_dir(self) -> str: + return self.cache_dir + + def get_public_dir(self) -> str: + return "" + + def get_web_path(self) -> str: + return "" + + def publish_image(self, target_name, relative: bool=True) -> str: + raise NotImplementedError(); + +class ImageProcessorNamespaceSearcher(ImageProcessorSearcher): + namespace: ImageProcessorNamespace + rel_dir: str + source_dir: str + + def __init__(self, namespace: ImageProcessorNamespace, rel_dir: str, source_dir: str): + self.namespace = namespace + self.rel_dir = rel_dir + self.source_dir = source_dir + + def get_lookup_dirs(self) -> list[str]: + return self.namespace.lookup_dirs + ([self.source_dir] if self.namespace.include_src else []) + + def transform_path(self, path: str) -> str: + return path.replace("$dir", self.rel_dir) + + def get_cache_dir(self) -> str: + return self.transform_path(self.namespace.cache_dir) + + def get_public_dir(self) -> str: + return self.transform_path(self.namespace.public_dir) + + def get_web_path(self) -> str: + return self.transform_path(self.namespace.web_path) + + def get_cache_searcher(self) -> ImageProcessorCacheSearcher: + return ImageProcessorCacheSearcher(self.get_cache_dir()) + +class ImageProcessor: + namespaces: dict[str, ImageProcessorNamespace] + + def __init__(self, namespaces: dict[str, ImageProcessorNamespace]): + self.namespaces = namespaces + + def get_namespace_by_path(self, path: str) -> ImageProcessorNamespace: + return self.namespaces[path.split(":")[0] if ":" in path else ""] + + def get_searcher_by_path(self, path: str, rel_dir: str, source_dir: str) -> ImageProcessorNamespaceSearcher: + return ImageProcessorNamespaceSearcher(self.get_namespace_by_path(path), rel_dir, source_dir) + + def process_image(self, input_filename: str, format: str, searcher: ImageProcessorSearcher, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: name = os.path.basename(input_filename) base, ext = os.path.splitext(name) ext = ext[1:] - full_path = self.find_image(input_filename, [source_dir]) + full_path = searcher.find_image(input_filename) if full_path is None: - raise FileNotFoundError(f'Image {input_filename} not found in {self.lookup_dirs} or {source_dir}.') + raise FileNotFoundError(f'Image {input_filename} not found in {searcher.get_lookup_dirs()}.') if format == "jpg": format = "jpeg" @@ -51,7 +166,7 @@ class ImageProcessor: # Locate all dependencies deps_full = [full_path] for dep in deps: - dep_full_path = self.find_image(dep, [source_dir]) + dep_full_path = searcher.find_image(dep) if dep_full_path is None: raise FileNotFoundError(f'Image dependency {dep} not found.') deps_full.append(dep_full_path) @@ -65,7 +180,7 @@ class ImageProcessor: if quality is not None: suffix += f'_q{quality}' target_name = base+suffix+"."+format - target_path = self.cache_dir + "/" + target_name + target_path = searcher.get_cache_dir() + "/" + target_name # Only regenerate if the file doesn't already exist and no dependencies are newer if not os.path.isfile(target_path) or self.is_outdated(target_path, deps_full): @@ -80,13 +195,13 @@ class ImageProcessor: # Try to find the converted filename in lookup_dirs, if you find # it, don't convert, just copy. - elif self.find_image(target_name, [source_dir]) is not None and not self.is_outdated(self.find_image(target_name, [source_dir]), deps): - shutil.copyfile(self.find_image(target_name, [source_dir]), target_path) + elif searcher.find_image(target_name) is not None and not self.is_outdated(searcher.find_image(target_name), deps): + shutil.copyfile(searcher.find_image(target_name), target_path) # Process asymptote elif ext == "asy": # Collect dependencies - deps_dir = self.cache_dir + "/" + name + "_deps" + deps_dir = searcher.get_cache_dir() + "/" + name + "_deps" if not os.path.isdir(deps_dir): os.mkdir(deps_dir) for dep_full in deps_full: @@ -96,7 +211,7 @@ class ImageProcessor: dpi_arg = ['-render', str(dpi/72)] if dpi is not None else [] if subprocess.run(['asy', name, '-o', target_name, '-f', format, *dpi_arg], cwd=deps_dir).returncode != 0: raise AsyError(f"Could not convert '{full_path}' to '{format}'") - shutil.move(deps_dir + "/" + target_name, self.cache_dir + "/" + target_name) + shutil.move(deps_dir + "/" + target_name, searcher.get_cache_dir() + "/" + target_name) # Convert SVGs using inkscape elif ext == "svg": @@ -124,37 +239,7 @@ class ImageProcessor: return True return False - def publish_image(self, target_name, relative: bool=True) -> str: - cache_path = self.cache_dir + "/" + target_name - if not os.path.isfile(cache_path): - raise FileNotFoundError(f'Image {target_name} not cached') - target_path = self.public_dir + "/" + target_name - try: - if os.path.exists(target_path): - if os.path.getmtime(cache_path) > os.path.getmtime(target_path): - os.remove(target_path) - os.link(cache_path, target_path) - else: - os.link(cache_path, target_path) - except OSError as e: - if e.errno == 18: # Invalid cross-device link: cache and public dirs are on different devices, don't hardlink, copy - shutil.copyfile(cache_path, target_path) - else: - raise e - return target_name if relative else target_path - - def get_image_size(self, input_filename: str, additional_dirs: list[str]=[]) -> tuple[int, int]: - full_path = self.find_image(input_filename, additional_dirs) - if full_path is None: - raise FileNotFoundError(f'Image {input_filename} not found.') + def get_image_size(self, full_path: str) -> tuple[int, int]: # Getting image size using ImageMagick is slow. VERY return Image.open(full_path).size - - def find_image(self, input_filename: str, additional_dirs: list[str]=[]) -> Union[str, None]: - for dir in [*self.lookup_dirs, *additional_dirs]: - if os.path.isfile(dir + "/" + input_filename): - return dir + "/" + input_filename - - - diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py index c97b90d..570478e 100644 --- a/src/formatitko/tex_generator.py +++ b/src/formatitko/tex_generator.py @@ -8,7 +8,7 @@ from typing import Union import os from .output_generator import OutputGenerator -from .images import ImageProcessor +from .images import ImageProcessor, ImageProcessorNamespaceSearcher from .whitespace import NBSP from .elements import FQuoted @@ -111,8 +111,12 @@ class UCWTexGenerator(OutputGenerator): additional_args = self.get_image_processor_args(e.attributes) - # The directory of the current file, will also look for images there. + # The directory of the current file relative to the current working directory source_dir = self.context.dir + # The directory of the current file relative to the md file we were called on + rel_dir = self.context.rel_dir + + searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir) _, ext = os.path.splitext(url) ext = ext[1:] @@ -122,17 +126,17 @@ class UCWTexGenerator(OutputGenerator): # Even supported elements have to be 'converted' because the # processing contains finding and moving them to the cache # directory. - url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) + url = self.imageProcessor.process_image(url, ext, searcher, **additional_args) elif ext in ["svg"]: # FIXME - url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args) elif ext in ["epdf"]: - url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args) elif ext in ["jpg"]: - url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args) else: - url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + url = self.imageProcessor.process_image(url, "pdf", searcher, **additional_args) - url = self.imageProcessor.find_image(url, [self.imageProcessor.cache_dir]) + url = searcher.get_cache_searcher().find_image(url) width = "" if "width" in e.attributes: width = e.attributes["width"] From caef60d4726cd1813025d26c9ef2e09e20f59bd0 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sun, 18 Feb 2024 00:27:13 +0100 Subject: [PATCH 09/22] TP is now passed to commands. Output from the TP is not transformed automatically and has to be done manually from the commands. --- src/formatitko/command_env.py | 1 + src/formatitko/command_util.py | 2 +- src/formatitko/context.py | 2 +- src/formatitko/output_generator.py | 2 +- src/formatitko/transform_processor.py | 6 +++--- test/test.md | 15 +++++++++++++-- 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/formatitko/command_env.py b/src/formatitko/command_env.py index 4775db4..a943d6f 100644 --- a/src/formatitko/command_env.py +++ b/src/formatitko/command_env.py @@ -5,4 +5,5 @@ from formatitko.util import parse_string from formatitko.context import Context from formatitko.command import Command +from .nop_processor import NOPProcessor from panflute import Element diff --git a/src/formatitko/command_util.py b/src/formatitko/command_util.py index f11a154..e4af09a 100644 --- a/src/formatitko/command_util.py +++ b/src/formatitko/command_util.py @@ -15,7 +15,7 @@ def parse_command(code: str) -> CommandCallable: indented_code_lines = [] for line in code_lines: indented_code_lines.append(("\t" if tabs else " ")+line) - code = "def command(element: Command, context: Context) -> list[Element]:\n"+"\n".join(indented_code_lines) + code = "def command(element: Command, context: Context, processor: NOPProcessor) -> list[Element]:\n"+"\n".join(indented_code_lines) env = {**command_env.__dict__} exec(code, env) return env["command"] diff --git a/src/formatitko/context.py b/src/formatitko/context.py index 31c1587..11f0b94 100644 --- a/src/formatitko/context.py +++ b/src/formatitko/context.py @@ -7,7 +7,7 @@ import warnings from .command import Command -CommandCallable = Callable[[Command, 'Context'], list[Element]] # This is here because of a wild circular import dependency between many functions and classes +CommandCallable = Callable[[Command, 'Context', 'NOPProcessor'], list[Element]] # This is here because of a wild circular import dependency between many functions and classes # This class is used to keep state while transforming the document using # transform.py. For the context to be available to the html and TeX generators, diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 332bb25..daa30af 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -49,7 +49,7 @@ class FormatitkoRecursiveError(Exception): eprint('on line: "' + stringify(line).strip() + '"', end="") eprint() eprint("in element: " + str(self.elements[0]).replace("\n", "\\n")) - sys.tracebacklimit = 0 + sys.tracebacklimit = 2 raise self.__cause__ from None diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index 60cba5a..d3b02b4 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -229,7 +229,7 @@ class TransformProcessor(NOPProcessor): if "python" in e.classes and "run" in e.classes: if not self.context.trusted: return nullify(e) - command_output = parse_command(e.text)(BlockCommand(), self.context) + command_output = parse_command(e.text)(BlockCommand(), self.context, self) e = BlockCommand().replaceSelf(*([] if command_output is None else command_output)) return self.transform(e) @@ -252,9 +252,9 @@ class TransformProcessor(NOPProcessor): def transform_Command(self, e: Command) -> Union[Div, Span]: if not self.context.get_command(e.attributes["c"]): raise NameError(f"Command not defined '{e.attributes['c']}'.") - command_output = self.context.get_command(e.attributes["c"])(e, self.context) + command_output = self.context.get_command(e.attributes["c"])(e, self.context, self) e = e.replaceSelf(*([] if command_output is None else command_output)) - return self.transform(e) + return e def transform_Whitespace(self, e: Whitespace) -> Whitespace: if bavlna(e, self.context): diff --git a/test/test.md b/test/test.md index 87929b8..80be954 100644 --- a/test/test.md +++ b/test/test.md @@ -198,13 +198,24 @@ ii. wym bro ```python {define=bash} import subprocess c = subprocess.run(["bash", "-c", element.text], stdout=subprocess.PIPE, check=True, encoding="utf-8") -return [pf.Para(pf.Str(c.stdout))] +return [pf.CodeBlock(c.stdout)] ``` ```bash {c=bash} -cat /etc/hostname +cat /etc/os-release ``` +::: {.group lang=cs} +```python {.run} +return processor.transform([ + *parse_string("V "), + pf.Link(pf.Str("odevzdávátku"), url="https://ksp.mff.cuni.cz/z/odevzdavatko/"), + *parse_string(" si necháte vygenerovat vstupy a odevzdáte příslušné výstupy. Záleží jen na vás, jak výstupy vyrobíte.") + +]) +``` +::: + ```html
hahahahaah From 5c066d46afb04844531c384380f682247075ac39 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Tue, 20 Feb 2024 01:00:36 +0100 Subject: [PATCH 10/22] =?UTF-8?q?Fix=20handlov=C3=A1n=C3=AD=20cest=20obr?= =?UTF-8?q?=C3=A1zk=C5=AF,=20kdy=C5=BE=20jsou=20namespacov=C3=A9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/formatitko/html_generator.py | 1 + src/formatitko/images.py | 3 +++ src/formatitko/tex_generator.py | 1 + 3 files changed, 5 insertions(+) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 54002ce..46e54a4 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -143,6 +143,7 @@ class HTMLGenerator(OutputGenerator): rel_dir = self.context.rel_dir searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir) + url = self.imageProcessor.get_path_without_namespace(url) _, ext = os.path.splitext(url) ext = ext[1:] diff --git a/src/formatitko/images.py b/src/formatitko/images.py index 0f65b7b..93bc2bc 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -149,6 +149,9 @@ class ImageProcessor: def get_namespace_by_path(self, path: str) -> ImageProcessorNamespace: return self.namespaces[path.split(":")[0] if ":" in path else ""] + def get_path_without_namespace(self, path: str) -> str: + return ":".join(path.split(":")[1:]) + def get_searcher_by_path(self, path: str, rel_dir: str, source_dir: str) -> ImageProcessorNamespaceSearcher: return ImageProcessorNamespaceSearcher(self.get_namespace_by_path(path), rel_dir, source_dir) diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py index 570478e..143e4a9 100644 --- a/src/formatitko/tex_generator.py +++ b/src/formatitko/tex_generator.py @@ -117,6 +117,7 @@ class UCWTexGenerator(OutputGenerator): rel_dir = self.context.rel_dir searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir) + url = self.imageProcessor.get_path_without_namespace(url) _, ext = os.path.splitext(url) ext = ext[1:] From 93f59493610cec80705e994ec5a17fdae5120163 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Tue, 20 Feb 2024 12:13:06 +0100 Subject: [PATCH 11/22] =?UTF-8?q?P=C5=99id=C3=A1na=20data=20na=20kontextu?= =?UTF-8?q?=20nez=C3=A1visl=C3=A1=20na=20docu.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/formatitko/context.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/formatitko/context.py b/src/formatitko/context.py index 11f0b94..0b885a3 100644 --- a/src/formatitko/context.py +++ b/src/formatitko/context.py @@ -23,6 +23,7 @@ CommandCallable = Callable[[Command, 'Context', 'NOPProcessor'], list[Element]] class Context: parent: Union["Context", None] _commands: dict[str, Union[CommandCallable, None]] + _data: dict[str, object] doc: Doc trusted: bool path: str @@ -34,6 +35,7 @@ class Context: def __init__(self, doc: Doc, path: str, parent: Union['Context', None]=None, trusted: bool=True): self.parent = parent self._commands = {} + self._data = {} self.doc = doc self.trusted = trusted self.path = path @@ -113,6 +115,40 @@ class Context: else: self.set_metadata(key, data) + def get_data(self, key: str, immediate: bool=False): + data = self._data + keys = key.split(".") + try: + for k in keys: + data = data[k] + return data + except KeyError: + if self.parent and not immediate: + return self.parent.get_data(key) + else: + return None + + def set_data(self, key: str, value: object): + data = self._data + keys = key.split(".") + for k in keys[:-1]: + try: + data = data[k] + except KeyError: + data[k] = {} + data = data[k] + data[keys[-1]] = value + + def unset_data(self, key: str): + if key == "": + self._doc = {} + data = self._doc + keys = key.split(".") + for k in keys[:-1]: + data = data[k] + del data[keys[-1]] + + # This is a custom element which creates \begingroup \endgroup groups in TeX From 6180b581b85678d9f216a3768878befea21f46df Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Tue, 20 Feb 2024 18:23:51 +0100 Subject: [PATCH 12/22] KaTeX server is now in separate repo. --- .gitmodules | 3 ++ src/formatitko/katex-server/index.mjs | 0 src/formatitko/katex-server/package-lock.json | 7 +++-- src/formatitko/katex-server/package.json | 5 +++- src/formatitko/katex-server/yarn.lock | 15 ++++++++++ src/formatitko/katex.py | 28 +++++++++++-------- 6 files changed, 43 insertions(+), 15 deletions(-) mode change 100644 => 100755 src/formatitko/katex-server/index.mjs create mode 100644 src/formatitko/katex-server/yarn.lock diff --git a/.gitmodules b/.gitmodules index 611d4fc..4f484b8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "ucwmac"] path = ucwmac url = git://git.ucw.cz/ucwmac.git +[submodule "src/formatitko/katex-server"] + path = src/formatitko/katex-server + url = https://gitea.ks.matfyz.cz/KSP/formatitko-katex-server.git diff --git a/src/formatitko/katex-server/index.mjs b/src/formatitko/katex-server/index.mjs old mode 100644 new mode 100755 diff --git a/src/formatitko/katex-server/package-lock.json b/src/formatitko/katex-server/package-lock.json index 3b5bee3..05246dc 100644 --- a/src/formatitko/katex-server/package-lock.json +++ b/src/formatitko/katex-server/package-lock.json @@ -1,15 +1,18 @@ { - "name": "ksp-katex-server", + "name": "formatitko-katex-server", "version": "1.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ksp-katex-server", + "name": "formatitko-katex-server", "version": "1.0.0", "license": "ISC", "dependencies": { "katex": "^0.16.3" + }, + "bin": { + "formatitko-katex-server": "index.mjs" } }, "node_modules/commander": { diff --git a/src/formatitko/katex-server/package.json b/src/formatitko/katex-server/package.json index 3d68121..adbb8ed 100644 --- a/src/formatitko/katex-server/package.json +++ b/src/formatitko/katex-server/package.json @@ -1,8 +1,11 @@ { - "name": "ksp-katex-server", + "name": "formatitko-katex-server", "version": "1.0.0", "description": "", "main": "index.mjs", + "bin": { + "formatitko-katex-server":"index.mjs" + }, "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, diff --git a/src/formatitko/katex-server/yarn.lock b/src/formatitko/katex-server/yarn.lock new file mode 100644 index 0000000..2760613 --- /dev/null +++ b/src/formatitko/katex-server/yarn.lock @@ -0,0 +1,15 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +commander@^8.3.0: + version "8.3.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-8.3.0.tgz#4837ea1b2da67b9c616a67afbb0fafee567bca66" + integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== + +katex@^0.16.3: + version "0.16.9" + resolved "https://registry.yarnpkg.com/katex/-/katex-0.16.9.tgz#bc62d8f7abfea6e181250f85a56e4ef292dcb1fa" + integrity sha512-fsSYjWS0EEOwvy81j3vRA8TEAhQhKiqO+FQaKWp0m39qwOzHVBgAUBIXWj1pB+O2W3fIpNa6Y9KSKCVbfPhyAQ== + dependencies: + commander "^8.3.0" diff --git a/src/formatitko/katex.py b/src/formatitko/katex.py index 39e521f..f81db24 100644 --- a/src/formatitko/katex.py +++ b/src/formatitko/katex.py @@ -3,6 +3,7 @@ import subprocess import tempfile import json import os +import shutil class KatexError(Exception): pass @@ -20,8 +21,10 @@ class KatexClient: _socket_file: str _temp_dir: tempfile.TemporaryDirectory[str] _connected: bool + _katex_server_path: str - def __init__(self, socket: str=None, connect: bool=True): + def __init__(self, socket: str=None, connect: bool=True, katex_server_path: str=None): + self._katex_server_path = katex_server_path if socket is not None: self._socket_file = socket else: @@ -38,20 +41,21 @@ class KatexClient: self._temp_dir = tempfile.TemporaryDirectory(prefix='formatitko') self._socket_file = self._temp_dir.name + "/katex-socket" - srcdir = os.path.dirname(os.path.realpath(__file__)) + if self._katex_server_path is None: + + srcdir = os.path.dirname(os.path.realpath(__file__)) - # Test if `node_modules` directory exists and if not, run `npm install` - if not os.path.isdir(srcdir + "/katex-server/node_modules"): - print("Installing node dependencies for the first time...") - try: - subprocess.run(["npm", "install"], cwd=srcdir+"/katex-server", check=True) - except subprocess.CalledProcessError as e: - if e.returncode == 127: + # Test if `node_modules` directory exists and if not, run `npm install` + if not os.path.isdir(srcdir + "/katex-server/node_modules"): + print("Installing node dependencies for the first time...") + npm = shutil.which("npm") or shutil.which("yarnpkg") + if npm is None: raise NPMNotFoundError("npm not found. Node.js is required to use KaTeX.") - else: - raise e + subprocess.run([npm, "install"], cwd=srcdir+"/katex-server", check=True) + + self._katex_server_path = srcdir + "/katex-server/index.mjs" - self._server_process = subprocess.Popen(["node", srcdir + "/katex-server/index.mjs", self._socket_file], stdout=subprocess.PIPE) + self._server_process = subprocess.Popen(["node", self._katex_server_path, self._socket_file], stdout=subprocess.PIPE) ok = self._server_process.stdout.readline() if ok != b"OK\n": From 7b81919914f27b4d64a0b6f0303401b3be0727e4 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Tue, 20 Feb 2024 18:27:34 +0100 Subject: [PATCH 13/22] OK the submodule was broken. --- .gitmodules | 2 +- src/formatitko/katex-server | 1 + src/formatitko/katex-server/.gitignore | 1 - src/formatitko/katex-server/README.md | 1 - src/formatitko/katex-server/index.js | 1 - src/formatitko/katex-server/index.mjs | 131 ------------------ src/formatitko/katex-server/package-lock.json | 42 ------ src/formatitko/katex-server/package.json | 17 --- src/formatitko/katex-server/yarn.lock | 15 -- 9 files changed, 2 insertions(+), 209 deletions(-) create mode 160000 src/formatitko/katex-server delete mode 100644 src/formatitko/katex-server/.gitignore delete mode 100644 src/formatitko/katex-server/README.md delete mode 100644 src/formatitko/katex-server/index.js delete mode 100755 src/formatitko/katex-server/index.mjs delete mode 100644 src/formatitko/katex-server/package-lock.json delete mode 100644 src/formatitko/katex-server/package.json delete mode 100644 src/formatitko/katex-server/yarn.lock diff --git a/.gitmodules b/.gitmodules index 4f484b8..59ee1f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = git://git.ucw.cz/ucwmac.git [submodule "src/formatitko/katex-server"] path = src/formatitko/katex-server - url = https://gitea.ks.matfyz.cz/KSP/formatitko-katex-server.git + url = https://gitea.ks.matfyz.cz:/KSP/formatitko-katex-server diff --git a/src/formatitko/katex-server b/src/formatitko/katex-server new file mode 160000 index 0000000..909d075 --- /dev/null +++ b/src/formatitko/katex-server @@ -0,0 +1 @@ +Subproject commit 909d075b9d37bdf19f5e9d382d6a441125fe7aa4 diff --git a/src/formatitko/katex-server/.gitignore b/src/formatitko/katex-server/.gitignore deleted file mode 100644 index 3c3629e..0000000 --- a/src/formatitko/katex-server/.gitignore +++ /dev/null @@ -1 +0,0 @@ -node_modules diff --git a/src/formatitko/katex-server/README.md b/src/formatitko/katex-server/README.md deleted file mode 100644 index 082cd55..0000000 --- a/src/formatitko/katex-server/README.md +++ /dev/null @@ -1 +0,0 @@ -This was made by Standa Lukeš @exyi diff --git a/src/formatitko/katex-server/index.js b/src/formatitko/katex-server/index.js deleted file mode 100644 index d6754c8..0000000 --- a/src/formatitko/katex-server/index.js +++ /dev/null @@ -1 +0,0 @@ -console.log(require('katex').renderToString('\\frac{2a}{b}')) diff --git a/src/formatitko/katex-server/index.mjs b/src/formatitko/katex-server/index.mjs deleted file mode 100755 index 2c1c05c..0000000 --- a/src/formatitko/katex-server/index.mjs +++ /dev/null @@ -1,131 +0,0 @@ -// KaTeX rendering server -// Listens on unix socket, path is provided as first argument -// Expects JSON lines, each line is a query with the following schema: -// { -// formulas: [ -// { -// tex: string, -// options?: object -// } -// ], -// options?: object -// } - -// see https://katex.org/docs/options.html for list of available options -// If options formulas[].options field is used, the global options field is ignored. - -// For each line, returns one JSON line with the following schema: -// { -// results: [ -// { html?: string } | { error?: string } -// ] -// } | { error?: string } - - -// If one formula is invalid, the error in results is used -// If the entire query is invalid (couldn't parse JSON, for example), the outer error field is used - - -import katex from 'katex' -import net from 'net' -import * as readline from 'readline' - -const myArgs = process.argv.slice(2) - -const unixSocketPath = myArgs[0] -if (!unixSocketPath) { - console.error('you must specify socket path') - process.exit(1) -} - -// This server listens on a Unix socket at /var/run/mysocket -var unixServer = net.createServer(handleClient); -unixServer.listen(unixSocketPath); -console.log("OK") - -function handleExit(signal) { - // unixServer.emit('close') - unixServer.close(function () { - - }); - process.exit(0); // put this into the callback to avoid closing open connections -} -process.on('SIGINT', handleExit); -process.on('SIGQUIT', handleExit); -process.on('SIGTERM', handleExit); -process.on('exit', handleExit); - -const defaultOptions = {} - -/** - * @param {net.Socket} socket - * @returns {Promise} - * */ -function socketWrite(socket, data) { - return new Promise((resolve, reject) => { - socket.write(data, (err) => { - if (err) { - reject(err) - } else { - resolve() - } - }) - }) -} - -/** - * @param {net.Socket} client - * */ -async function handleClient(client) { - const rl = readline.createInterface({ input: client }) - - /* Added by GS: A stack of katex's `macros` objects, each group inherits - * the one from the parent group and can add its own stuff without - * affecting the parent. - */ - let macroStack = [{}] - for await (const line of rl) { - try { - // The custom commands for pushing and popping the macro stack. - if (line === "begingroup") { - // Copy the current state of macros and push it onto the stack. - macroStack.push({...macroStack.slice(-1)[0]}) - continue - } else if (line === "endgroup") { - macroStack.pop() - continue - } else if (line === "init") { - macroStack = [{}] - continue - } - const query = JSON.parse(line) - const results = [] - for (const input of query.formulas) { - const options = input.options ?? query.options ?? defaultOptions - // Add macros from the macros option - if (options.macros) { - for (const macro of Object.keys(options.macros)) { - macroStack.slice(-1)[macro] = options.macros[macro] - } - } - options.macros = macroStack.slice(-1)[0] - // Enforce globalGroup option, katex then saves created macros - // into the options.macros object. - options.globalGroup = true - try { - const html = katex.renderToString(input.tex, options) - results.push({ html }) - } catch (e) { - results.push({ error: String(e) }) - } - } - await socketWrite(client, JSON.stringify({ results }, null, query.debug ? ' ' : undefined)) - await socketWrite(client, '\n') - } catch (e) { - console.error(e) - await socketWrite(client, JSON.stringify({ error: String(e) })) - await socketWrite(client, '\n') - } - } -} - diff --git a/src/formatitko/katex-server/package-lock.json b/src/formatitko/katex-server/package-lock.json deleted file mode 100644 index 05246dc..0000000 --- a/src/formatitko/katex-server/package-lock.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "name": "formatitko-katex-server", - "version": "1.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "formatitko-katex-server", - "version": "1.0.0", - "license": "ISC", - "dependencies": { - "katex": "^0.16.3" - }, - "bin": { - "formatitko-katex-server": "index.mjs" - } - }, - "node_modules/commander": { - "version": "8.3.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz", - "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==", - "engines": { - "node": ">= 12" - } - }, - "node_modules/katex": { - "version": "0.16.3", - "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.3.tgz", - "integrity": "sha512-3EykQddareoRmbtNiNEDgl3IGjryyrp2eg/25fHDEnlHymIDi33bptkMv6K4EOC2LZCybLW/ZkEo6Le+EM9pmA==", - "funding": [ - "https://opencollective.com/katex", - "https://github.com/sponsors/katex" - ], - "dependencies": { - "commander": "^8.0.0" - }, - "bin": { - "katex": "cli.js" - } - } - } -} diff --git a/src/formatitko/katex-server/package.json b/src/formatitko/katex-server/package.json deleted file mode 100644 index adbb8ed..0000000 --- a/src/formatitko/katex-server/package.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "formatitko-katex-server", - "version": "1.0.0", - "description": "", - "main": "index.mjs", - "bin": { - "formatitko-katex-server":"index.mjs" - }, - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "author": "", - "license": "ISC", - "dependencies": { - "katex": "^0.16.3" - } -} diff --git a/src/formatitko/katex-server/yarn.lock b/src/formatitko/katex-server/yarn.lock deleted file mode 100644 index 2760613..0000000 --- a/src/formatitko/katex-server/yarn.lock +++ /dev/null @@ -1,15 +0,0 @@ -# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. -# yarn lockfile v1 - - -commander@^8.3.0: - version "8.3.0" - resolved "https://registry.yarnpkg.com/commander/-/commander-8.3.0.tgz#4837ea1b2da67b9c616a67afbb0fafee567bca66" - integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== - -katex@^0.16.3: - version "0.16.9" - resolved "https://registry.yarnpkg.com/katex/-/katex-0.16.9.tgz#bc62d8f7abfea6e181250f85a56e4ef292dcb1fa" - integrity sha512-fsSYjWS0EEOwvy81j3vRA8TEAhQhKiqO+FQaKWp0m39qwOzHVBgAUBIXWj1pB+O2W3fIpNa6Y9KSKCVbfPhyAQ== - dependencies: - commander "^8.3.0" From e2f2c4f5f05d9d3694bb863c6195f52a8802a0c4 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Tue, 20 Feb 2024 20:09:58 +0100 Subject: [PATCH 14/22] =?UTF-8?q?Je=C5=A1t=C4=9B=20v=C3=ADce=20magie,=20kt?= =?UTF-8?q?er=C3=A1=20se=20sna=C5=BE=C3=AD=20zachra=C5=88ovat=20blokov?= =?UTF-8?q?=C3=A9=20v=C3=BDstupy=20z=20p=C5=99=C3=ADkaz=C5=AF,=20kter?= =?UTF-8?q?=C3=A9=20byly=20zavol=C3=A1ny=20jako=20Span.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/formatitko/command.py | 2 +- src/formatitko/output_generator.py | 2 +- src/formatitko/transform_processor.py | 27 ++++++++++++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/formatitko/command.py b/src/formatitko/command.py index 943c304..88686a0 100644 --- a/src/formatitko/command.py +++ b/src/formatitko/command.py @@ -16,7 +16,7 @@ class InlineCommand(Span, Command): if len(content) == 1 and (isinstance(content[0], Para) or isinstance(content[0], Plain)): return Span(*content[0].content) else: - raise InlineError(f"The command {self.attributes['c']} returned multiple Paragraphs and must be executed using `::: {{c={self.attributes['c']}}}\\n:::`.\n\n{content}") + return Div(*content) pass class BlockCommand(Div, Command): diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index daa30af..332bb25 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -49,7 +49,7 @@ class FormatitkoRecursiveError(Exception): eprint('on line: "' + stringify(line).strip() + '"', end="") eprint() eprint("in element: " + str(self.elements[0]).replace("\n", "\\n")) - sys.tracebacklimit = 2 + sys.tracebacklimit = 0 raise self.__cause__ from None diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index d3b02b4..d327663 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -18,7 +18,7 @@ from .context import Group, InlineGroup, BlockGroup from .util import nullify, import_md from .context import Context, CommandCallable from .whitespace import Whitespace, bavlna -from .command import BlockCommand, InlineCommand, CodeCommand, Command +from .command import BlockCommand, InlineCommand, CodeCommand, Command, InlineError from .command_util import handle_command_define, parse_command from .nop_processor import NOPProcessor, ELCl, DoubleDocError @@ -56,6 +56,18 @@ class TransformProcessor(NOPProcessor): return nullify(e) return e + def transform_ListContainer(self, e: ListContainer) -> ListContainer: + try: + return super().transform_ListContainer(e) + except TypeError as err: + names = [] + for el in e: + if hasattr(el, "attributes") and "c" in el.attributes: + names.append(el.attributes["c"]) + if len(names) > 0: + raise InlineError(f"The command{'s' if len(names) > 1 else ''} {names[0] if len(names) == 1 else names} was called in an Inline way but returned Block content. Put it in a paragraph alone or execute it as a Div using: \n::: {{c={names[0] if len(names) == 1 else ''}}}\n:::") + else: + raise err def transform_Doc(self, e: Doc) -> Doc: if self.context is not None: @@ -96,6 +108,19 @@ class TransformProcessor(NOPProcessor): else: return BlockGroup(*content, context=new_context) + def transform_Para(self, e: Para) -> Union[Para, Div]: + if len(e.content) == 1 and isinstance(e.content[0], Span): + # If the span turns out to be a command, it might return a Div. We should then replace ourselves with the Div + span = e.content[0] + span = self.transform(span) + if isinstance(span, Div): + return span + else: + e.content[0] = span + return super().transform_Para(e) + else: + return super().transform_Para(e) + def transform_Div(self, e: Div) -> Union[Div, Group, Null, RawBlock]: e.content = self.transform(e.content) From 3ce0b5037b762bcfb6ebf37776b36b7979b5a6f8 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Wed, 21 Feb 2024 15:04:25 +0100 Subject: [PATCH 15/22] Some changes to allow commands to touch the rest of the tree they're currently in. This shall only be done on parts of the tree not yet transformed, otherwise, very weird things can happen. --- src/formatitko/nop_processor.py | 8 ++++++-- src/formatitko/output_generator.py | 4 ++-- src/formatitko/transform_processor.py | 27 +++++++++++++++++---------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/formatitko/nop_processor.py b/src/formatitko/nop_processor.py index bc20a2a..f20ff1b 100644 --- a/src/formatitko/nop_processor.py +++ b/src/formatitko/nop_processor.py @@ -127,13 +127,17 @@ class NOPProcessor: raise FormatitkoRecursiveError(e, self.context) from err def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: - for i in range(len(e)): + i = 0 + while i < len(e): # The length of the list can change mid-transformation, so we need to check the length each time e[i] = self.transform(e[i]) + i-=-1 return e def transform_ListContainer(self, e: ListContainer) -> ListContainer: - for i in range(len(e)): + i = 0 + while i < len(e): # The length of the list can change mid-transformation, so we need to check the length each time e[i] = self.transform(e[i]) + i-=-1 return e diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 332bb25..7ba1278 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -31,7 +31,7 @@ class FormatitkoRecursiveError(Exception): def add_element(self, e: Union[Element, ListContainer, list[Union[Element, ListContainer]]]): self.elements.append(e) - def pretty_print(self): + def pretty_print(self, tracebacklimit: int=0): def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) @@ -49,7 +49,7 @@ class FormatitkoRecursiveError(Exception): eprint('on line: "' + stringify(line).strip() + '"', end="") eprint() eprint("in element: " + str(self.elements[0]).replace("\n", "\\n")) - sys.tracebacklimit = 0 + sys.tracebacklimit = tracebacklimit raise self.__cause__ from None diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index d327663..4560e0c 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -3,6 +3,7 @@ from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quote from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead from panflute import TableRow, TableCell, Caption, Doc from panflute import MetaValue +from panflute.containers import attach from typing import Union, Callable from types import ModuleType @@ -75,8 +76,8 @@ class TransformProcessor(NOPProcessor): self.context = Context(e, self.root_file_path) for module, module_name in self._command_modules: self.context.add_commands_from_module(module, module_name) - e.content = self.transform(e.content) e.content = [BlockGroup(*e.content, context=self.context)] + e.content = self.transform(e.content) return e @@ -136,8 +137,9 @@ class TransformProcessor(NOPProcessor): # Commands can be called multiple ways, this handles the following syntax: # :::{c=commandname} # ::: - e = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) - return self.transform(e) + command = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) + attach(command, e.parent, e.location, e.index) + return self.transform(command) if "partial" in e.attributes: # `partial` attribute @@ -195,14 +197,16 @@ class TransformProcessor(NOPProcessor): if "c" in e.attributes: # Commands can be called multiple ways, this handles the following syntax: # []{c=commandname} and - e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) - return self.transform(e) + command = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) + attach(command, e.parent, e.location, e.index) + return self.transform(command) if len(e.content) == 1 and isinstance(e.content[0], Str): ## Handle special command shorthand [!commandname]{} if re.match(r"^![\w.]+$", e.content[0].text): - e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]}) - return self.transform(e) + command = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]}) + attach(command, e.parent, e.location, e.index) + return self.transform(command) ## Handle import [#ksp_formatitko as ksp]{}, [#ksp_formatitko]{type=module} or [#path/file.md]{type=md} # Import a python module as commands (type=module, the default) or @@ -255,8 +259,9 @@ class TransformProcessor(NOPProcessor): if not self.context.trusted: return nullify(e) command_output = parse_command(e.text)(BlockCommand(), self.context, self) - e = BlockCommand().replaceSelf(*([] if command_output is None else command_output)) - return self.transform(e) + command = BlockCommand().replaceSelf(*([] if command_output is None else command_output)) + attach(command, e.parent, e.location, e.index) + return self.transform(command) if "python" in e.classes and ("define" in e.attributes or "redefine" in e.attributes): if not self.context.trusted: @@ -264,7 +269,9 @@ class TransformProcessor(NOPProcessor): return handle_command_define(e, self.context) if "c" in e.attributes: - return self.transform(CodeCommand(e.text, identifier=e.identifier, classes=e.classes, attributes=e.attributes)) + command = CodeCommand(e.text, identifier=e.identifier, classes=e.classes, attributes=e.attributes) + attach(command, e.parent, e.location, e.index) + return self.transform(command) # Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks # OG now has Context so this is not needed per se, but I'm keeping this here for the handling of attribute > context > default value From 7f3490536eef56bb7c90492b9861cd3411aae2c0 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Wed, 21 Feb 2024 16:27:04 +0100 Subject: [PATCH 16/22] Attach Groups correctly in the tree, Images now support height in HTML. --- src/formatitko/html_generator.py | 2 ++ src/formatitko/transform_processor.py | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 46e54a4..3373a25 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -190,6 +190,8 @@ class HTMLGenerator(OutputGenerator): attributes = self.common_attributes(e) if "width" in e.attributes: attributes["width"] = e.attributes["width"] + if "height" in e.attributes: + attributes["height"] = e.attributes["height"] if e.title: attributes["alt"] = e.title diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index 4560e0c..28231cd 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -99,15 +99,17 @@ class TransformProcessor(NOPProcessor): e.attributes["no-srcset"] = self.context.get_metadata("no-srcset") if self.context.get_metadata("no-srcset") is not None else False return e - def create_Group(self, *content, new_context: Context, inline: bool=False) -> Group: + def create_Group(self, *content, new_context: Context, replaced:Element, inline: bool=False) -> Group: old_context = self.context self.context = new_context - content = self.transform([*content]) - self.context = old_context if inline: - return InlineGroup(*content, context=new_context) + g = InlineGroup(*content, context=new_context) else: - return BlockGroup(*content, context=new_context) + g = BlockGroup(*content, context=new_context) + attach(g, replaced.parent, replaced.location, replaced.index) + g = self.transform(g) + self.context = old_context + return g def transform_Para(self, e: Para) -> Union[Para, Div]: if len(e.content) == 1 and isinstance(e.content[0], Span): @@ -131,7 +133,7 @@ class TransformProcessor(NOPProcessor): new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) for attribute, value in e.attributes.items(): new_context.set_metadata(attribute, value) - return self.create_Group(*e.content, new_context=new_context) + return self.create_Group(*e.content, replaced=e, new_context=new_context) if "c" in e.attributes: # Commands can be called multiple ways, this handles the following syntax: @@ -160,7 +162,7 @@ class TransformProcessor(NOPProcessor): trusted = False if not self.context.trusted: trusted = False - return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, path, self.context, trusted=trusted)) + return self.create_Group(*includedDoc.content, replaced=e, new_context=Context(includedDoc, path, self.context, trusted=trusted)) elif e.attributes["type"] in ["tex", "html"]: return RawBlock(text, e.attributes["type"]) @@ -192,7 +194,7 @@ class TransformProcessor(NOPProcessor): new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) for attribute, value in e.attributes.items(): new_context.set_metadata(attribute, value) - return self.create_Group(*e.content, new_context=new_context, inline=True) + return self.create_Group(*e.content, replaced=e, new_context=new_context, inline=True) if "c" in e.attributes: # Commands can be called multiple ways, this handles the following syntax: @@ -253,7 +255,7 @@ class TransformProcessor(NOPProcessor): def transform_CodeBlock(self, e: CodeBlock) -> Union[CodeBlock, Div, Null]: if "markdown" in e.classes and "group" in e.classes: includedDoc = import_md(e.text) - return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, self.context.path, self.context, self.context.trusted)) + return self.create_Group(*includedDoc.content, replaced=e, new_context=Context(includedDoc, self.context.path, self.context, self.context.trusted)) if "python" in e.classes and "run" in e.classes: if not self.context.trusted: From 72b9bc7bf15d31ce1b8315ec002c51acc415b32a Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Wed, 21 Feb 2024 21:43:49 +0100 Subject: [PATCH 17/22] Add special StandaloneHTMLGenerator. Also handle prepending the document in a more pandocy way. --- src/formatitko/formatitko.py | 27 ++++++++++++++++--------- src/formatitko/html_generator.py | 32 ++++++++++++++++++++++++++++++ src/formatitko/images.py | 2 ++ src/formatitko/output_generator.py | 3 ++- test/test-top.html | 8 -------- test/test.md | 11 +++++----- 6 files changed, 60 insertions(+), 23 deletions(-) delete mode 100644 test/test-top.html diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 486c5a4..4038b9d 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -8,11 +8,10 @@ import shutil # Import local files from .util import import_md -from .context import Context, BlockGroup from .katex import KatexClient from .images import ImageProcessor, ImageProcessorNamespace from .output_generator import OutputGenerator, FormatitkoRecursiveError -from .html_generator import HTMLGenerator +from .html_generator import HTMLGenerator, StandaloneHTMLGenerator from .transform_processor import TransformProcessor from .pandoc_processor import PandocProcessor from .tex_generator import UCWTexGenerator @@ -27,6 +26,7 @@ def main(): parser.add_argument("-c", "--img-cache-dir", help="Directory to cache processed images and intermediate products. The program will overwrite files, whose dependencies are newer.", default="cache") parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/") parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.") + parser.add_argument("-s", "--output-standalone-html", help="The Standalone HTML file to write into. A full page is generated instead of just a fragment.") parser.add_argument("-t", "--output-tex", help="The TEX file to write into.") parser.add_argument("-m", "--output-md", help="The Markdown file to write into. (Uses pandoc to generate markdown)") parser.add_argument("-j", "--output-json", help="The JSON file to dump the pandoc-compatible AST into.") @@ -35,6 +35,7 @@ def main(): parser.add_argument("-k", "--katex-socket", help="The KaTeX server socket filename obtained by running with `--katex-server`.") parser.add_argument("input_filename", help="The markdown file to process.", nargs="?" if "--katex-server" in sys.argv else None) parser.add_argument("--debug", action='store_true') + parser.add_argument("--traceback-limit", help="Traceback limit for when errors happen, defaults to 0, as it is only useful for internal debugging.", default=0) args = parser.parse_args() if args.katex_server: @@ -54,12 +55,12 @@ def main(): try: OutputGenerator(sys.stdout).generate(doc) except FormatitkoRecursiveError as e: - e.pretty_print() + e.pretty_print(tracebacklimit=args.traceback_limit) try: doc = TransformProcessor(args.input_filename).transform(doc) except FormatitkoRecursiveError as e: - e.pretty_print() + e.pretty_print(tracebacklimit=args.traceback_limit) # Initialize the image processor (this just keeps some basic state) imageProcessor = ImageProcessor({"": ImageProcessorNamespace(args.img_public_dir, args.img_web_path, args.img_cache_dir, args.img_lookup_dirs, True)}) @@ -71,15 +72,23 @@ def main(): try: HTMLGenerator(file, katexClient, imageProcessor).generate(doc) except FormatitkoRecursiveError as e: - e.pretty_print() + e.pretty_print(tracebacklimit=args.traceback_limit) + + if args.output_standalone_html is not None: + # Initialize KaTeX client (this runs the node app and connects to a unix socket) + with KatexClient(socket=args.katex_socket) as katexClient: + with open(args.output_standalone_html, "w") as file: + try: + StandaloneHTMLGenerator(file, katexClient, imageProcessor).generate(doc) + except FormatitkoRecursiveError as e: + e.pretty_print(tracebacklimit=args.traceback_limit) if args.output_tex is not None: with open(args.output_tex, "w") as file: try: UCWTexGenerator(file, imageProcessor).generate(doc) except FormatitkoRecursiveError as e: - e.pretty_print() - + e.pretty_print(tracebacklimit=args.traceback_limit) if args.output_md is not None: with open(args.output_md, "w") as file: @@ -96,7 +105,7 @@ def main(): try: UCWTexGenerator(file, imageProcessor).generate(doc) except FormatitkoRecursiveError as e: - e.pretty_print() + e.pretty_print(tracebacklimit=args.traceback_limit) filename = fd.name else: filename = args.output_tex @@ -109,7 +118,7 @@ def main(): try: OutputGenerator(sys.stdout).generate(doc) except FormatitkoRecursiveError as e: - e.pretty_print() + e.pretty_print(tracebacklimit=args.traceback_limit) if __name__ == "__main__": diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 3373a25..28f49de 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -20,6 +20,7 @@ from .katex import KatexClient from .images import ImageProcessor, ImageProcessorNamespaceSearcher from .util import inlinify + class HTMLGenerator(OutputGenerator): imageProcessor: ImageProcessor katexClient: KatexClient @@ -317,3 +318,34 @@ class HTMLGenerator(OutputGenerator): def generate_DefinitionList(self, e: DefinitionList): self.writeln("") + + +class StandaloneHTMLGenerator(HTMLGenerator): + def generate_Doc(self, e: Doc): + self.writeraw("") + self.writeln(self.start_tag("html", attributes={"lang": e.get_metadata("lang", None, True)})) + self.writeln(self.start_tag("head")) + self.indent_more() + self.writeln(self.single_tag("meta", attributes={"charset": "utf-8"})) + self.writeln(self.single_tag("meta", attributes={"viewport": "width=device-width, initial-scale=1.0"})) + self.writeln(self.single_tag("link", attributes={"href": "https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css", "integrity":"sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0", "crossorigin":"anonymous"})) + if "title" in e.metadata: + self.write(self.start_tag("title")) + self.generate(e.metadata["title"]) + self.write(self.end_tag("title")) + self.endln() + if "html-head-includes" in e.metadata: + self.generate(e.metadata["html-head-includes"]) + self.indent_less() + self.writeln(self.end_tag("head")) + self.writeln(self.start_tag("body")) + self.indent_more() + super().generate_Doc(e) + self.indent_less() + self.writeln(self.end_tag("body")) + self.writeln(self.end_tag("html")) + + + + + diff --git a/src/formatitko/images.py b/src/formatitko/images.py index 93bc2bc..d7741f0 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -150,6 +150,8 @@ class ImageProcessor: return self.namespaces[path.split(":")[0] if ":" in path else ""] def get_path_without_namespace(self, path: str) -> str: + if len(path.split(":")) <= 1: + return path return ":".join(path.split(":")[1:]) def get_searcher_by_path(self, path: str, rel_dir: str, source_dir: str) -> ImageProcessorNamespaceSearcher: diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 7ba1278..71dbebf 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -478,12 +478,13 @@ class OutputGenerator: self.generate_simple_tag(e) def generate_Doc(self, e: Doc): + if "header-includes" in e.metadata: # This is the pandoc way of doing things + self.generate(e.metadata["header-includes"]) if "header_content" in e.metadata: self.generate(e.metadata["header_content"]) self.generate_simple_tag(e) if "footer_content" in e.metadata: self.generate(e.metadata["footer_content"]) - def generate_BlockGroup(self, e: BlockGroup): self.generate_simple_tag(e) diff --git a/test/test-top.html b/test/test-top.html deleted file mode 100644 index 0cea640..0000000 --- a/test/test-top.html +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/test/test.md b/test/test.md index 80be954..563687c 100644 --- a/test/test.md +++ b/test/test.md @@ -3,12 +3,13 @@ title: 'Wooooo a title' subtitle: 'A subtitle' are_we_there_yet: False lang: "en" +header-includes: | + --- -:::: {.header_content} -::: {partial="test-top.html" type="html"} -::: -:::: - [#test-files/test-import.md]{type=md} [#test.json]{type=metadata key=orgs} From a1c439c32e66cbbcf9892050a4c1f9899670494d Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Wed, 21 Feb 2024 23:34:23 +0100 Subject: [PATCH 18/22] Minor fixes, typos, generating of MetaValues, fixed error where KeyErrors from the inside of the tree would get eaten. --- src/formatitko/html_generator.py | 2 +- src/formatitko/nop_processor.py | 4 +++- src/formatitko/output_generator.py | 25 +++++++++++++++++++------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 28f49de..4089a69 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -328,7 +328,7 @@ class StandaloneHTMLGenerator(HTMLGenerator): self.indent_more() self.writeln(self.single_tag("meta", attributes={"charset": "utf-8"})) self.writeln(self.single_tag("meta", attributes={"viewport": "width=device-width, initial-scale=1.0"})) - self.writeln(self.single_tag("link", attributes={"href": "https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css", "integrity":"sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0", "crossorigin":"anonymous"})) + self.writeln(self.single_tag("link", attributes={"rel": "stylesheet", "href": "https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css", "integrity":"sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0", "crossorigin":"anonymous"})) if "title" in e.metadata: self.write(self.start_tag("title")) self.generate(e.metadata["title"]) diff --git a/src/formatitko/nop_processor.py b/src/formatitko/nop_processor.py index f20ff1b..8218b2e 100644 --- a/src/formatitko/nop_processor.py +++ b/src/formatitko/nop_processor.py @@ -111,10 +111,12 @@ class NOPProcessor: e = transformer(e) try: - e = self.TYPE_DICT[type(e)](e) + method = self.TYPE_DICT[type(e)] except KeyError: raise self.UnknownElementError(type(e)) + e = method(e) + for transformer in self.get_posttransformers(): e = transformer(e) diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index 71dbebf..f2e0853 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -157,9 +157,10 @@ class OutputGenerator: self.generate_MetaList(e) else: try: - self.TYPE_DICT_MISC[type(e)](e) + method = self.TYPE_DICT_MISC[type(e)] except KeyError as err: raise UnknownElementError(type(e)) from err + method(e) if isinstance(e, Group): self.context = old_context except FormatitkoRecursiveError as err: @@ -292,9 +293,10 @@ class OutputGenerator: def generate_MetaValue(self, e: MetaValue): try: - self.TYPE_DICT_META[type(e)](e) + method = self.TYPE_DICT_META[type(e)] except KeyError: self.generate(e.content) + method(e) def generate_MetaBlocks(self, e: MetaBlocks): self.generate(e.content) @@ -303,16 +305,23 @@ class OutputGenerator: self.generate(e.content) def generate_MetaBool(self, e: MetaBool): - self.generate_simple_tag(e) + if e.boolean: + self.write("True") + else: + self.write("False") def generate_MetaMap(self, e: MetaMap): self.generate_simple_tag(e) def generate_MetaString(self, e: MetaString): - self.generate_simple_tag(e) + self.write(e.text) def generate_Inline(self, e: Inline): - self.TYPE_DICT_INLINE[type(e)](e) + try: + method = self.TYPE_DICT_INLINE[type(e)] + except KeyError as err: + raise UnknownElementError(type(e)) from err + method(e) def generate_Str(self, e: Str): self.write(self.escape_special_chars(e.text)) @@ -413,7 +422,11 @@ class OutputGenerator: def generate_Block(self, e: Block): - self.TYPE_DICT_BLOCK[type(e)](e) + try: + method = self.TYPE_DICT_BLOCK[type(e)] + except KeyError as err: + raise UnknownElementError(type(e)) from err + method(e) # Block elements From cd07b3abf8152942c474c5125b7d502d4bedf6a9 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Thu, 22 Feb 2024 11:50:50 +0100 Subject: [PATCH 19/22] Creating of dirs in namespaces only when they're accessed (this is needed because until then, we don't know what $dir is. Also telling pandoc to strip comments. --- src/formatitko/images.py | 17 ++++++++++------- src/formatitko/util.py | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/formatitko/images.py b/src/formatitko/images.py index d7741f0..6dcf52e 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -38,11 +38,6 @@ class ImageProcessorNamespace: self.lookup_dirs = lookup_dirs self.web_path = web_path if web_path[-1] != "/" else web_path[:-1] self.include_src = include_src - if not os.path.exists(self.public_dir): - os.mkdir(self.public_dir) - if not os.path.exists(self.cache_dir): - os.mkdir(self.cache_dir) - class ImageProcessorSearcher: def get_lookup_dirs(self) -> list[str]: @@ -96,6 +91,8 @@ class ImageProcessorCacheSearcher(ImageProcessorSearcher): def __init__(self, cache_dir: str): self.cache_dir = cache_dir + if not os.path.exists(self.cache_dir): + os.makedirs(self.cache_dir, exist_ok=True) def get_lookup_dirs(self) -> list[str]: return [self.cache_dir] @@ -129,10 +126,16 @@ class ImageProcessorNamespaceSearcher(ImageProcessorSearcher): return path.replace("$dir", self.rel_dir) def get_cache_dir(self) -> str: - return self.transform_path(self.namespace.cache_dir) + cache_dir = self.transform_path(self.namespace.cache_dir) + if not os.path.exists(cache_dir): + os.makedirs(cache_dir, exist_ok=True) + return cache_dir def get_public_dir(self) -> str: - return self.transform_path(self.namespace.public_dir) + public_dir = self.transform_path(self.namespace.public_dir) + if not os.path.exists(public_dir): + os.makedirs(public_dir, exist_ok=True) + return public_dir def get_web_path(self) -> str: return self.transform_path(self.namespace.web_path) diff --git a/src/formatitko/util.py b/src/formatitko/util.py index f4eb6b4..0cf614b 100644 --- a/src/formatitko/util.py +++ b/src/formatitko/util.py @@ -37,7 +37,7 @@ def parse_string(s: str) -> list[Union[Str, Space]]: # we ever want to disable or enable some of panflute's markdown extensions, # this is the place to do it. def import_md(s: str, standalone: bool=True) -> Union[Doc, list[Element]]: - return convert_text(s, standalone=standalone, input_format="markdown-definition_lists-latex_macros") + return convert_text(s, standalone=standalone, input_format="markdown-definition_lists-latex_macros", extra_args=["--strip-comments"]) def import_md_list(s: str) -> list[Element]: return import_md(s, standalone=False) From f14f28d3a4b80608816f306ec658caed29174eb6 Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Thu, 22 Feb 2024 14:02:32 +0100 Subject: [PATCH 20/22] Added dependency printing --- src/formatitko/context.py | 20 +++++++++++++++++++- src/formatitko/formatitko.py | 7 +++++++ src/formatitko/html_generator.py | 1 + src/formatitko/images.py | 6 +++++- src/formatitko/output_generator.py | 1 + src/formatitko/tex_generator.py | 1 + src/formatitko/transform_processor.py | 12 +++++++++--- 7 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/formatitko/context.py b/src/formatitko/context.py index 0b885a3..a4563bc 100644 --- a/src/formatitko/context.py +++ b/src/formatitko/context.py @@ -3,7 +3,6 @@ from panflute import Doc, Element, Div, Span from typing import Union, Callable from types import ModuleType import os -import warnings from .command import Command @@ -31,6 +30,7 @@ class Context: filename: str root_dir: str # Absolute path to the dir of the file formátítko was called on rel_dir: str # Relative path to the current dir from the root dir + deps: set[str] def __init__(self, doc: Doc, path: str, parent: Union['Context', None]=None, trusted: bool=True): self.parent = parent @@ -43,6 +43,8 @@ class Context: self.filename = os.path.basename(path) self.root_dir = parent.root_dir if parent else os.path.abspath(self.dir) self.rel_dir = os.path.relpath(self.dir, self.root_dir) + self.deps = set() + self.add_dep(path) if self.get_metadata("flags", immediate=True) is None: self.set_metadata("flags", {}) @@ -148,8 +150,24 @@ class Context: data = data[k] del data[keys[-1]] + def get_deps(self) -> list[str]: + if self.parent is not None: + return self.parent.get_deps() + else: + return self.deps + + def add_dep(self, dep: str): + self.get_deps().add(os.path.abspath(dep)) + + def add_deps(self, deps: list[str]): + self.get_deps().update([os.path.abspath(path) for path in deps]) +def get_context_from_doc(doc: Doc) -> Context: + if len(doc.content) == 1 and isinstance(doc.content[0], Group): + return doc.content[0].context + else: + return None # This is a custom element which creates \begingroup \endgroup groups in TeX # and also causes KaTeX math blocks to be isolated in a similar way. diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index 4038b9d..3e764ba 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -15,6 +15,7 @@ from .html_generator import HTMLGenerator, StandaloneHTMLGenerator from .transform_processor import TransformProcessor from .pandoc_processor import PandocProcessor from .tex_generator import UCWTexGenerator +from .context import get_context_from_doc from panflute import convert_text @@ -36,6 +37,7 @@ def main(): parser.add_argument("input_filename", help="The markdown file to process.", nargs="?" if "--katex-server" in sys.argv else None) parser.add_argument("--debug", action='store_true') parser.add_argument("--traceback-limit", help="Traceback limit for when errors happen, defaults to 0, as it is only useful for internal debugging.", default=0) + parser.add_argument("--deps", help="File to write list of dependencies to. May depend on output formats used.") args = parser.parse_args() if args.katex_server: @@ -113,6 +115,11 @@ def main(): subprocess.run(["pdfcsplain", "-halt-on-error", "-output-directory="+outdir.name, "-jobname=formatitko", filename], check=True) shutil.move(outdir.name+"/formatitko.pdf", args.output_pdf) + if args.deps is not None: + with open(args.deps, "w") as file: + for dep in get_context_from_doc(doc).get_deps(): + file.write(dep + "\n") + if args.debug: print("-----------------------------------") try: diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 4089a69..280c60e 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -137,6 +137,7 @@ class HTMLGenerator(OutputGenerator): url = e.url additional_args = self.get_image_processor_args(e.attributes) + additional_args["context"] = self.context # The directory of the current file relative to the current working directory source_dir = self.context.dir diff --git a/src/formatitko/images.py b/src/formatitko/images.py index 6dcf52e..c447c30 100644 --- a/src/formatitko/images.py +++ b/src/formatitko/images.py @@ -4,6 +4,8 @@ import shutil import subprocess from PIL import Image +from .context import Context + class FileInWrongDirError(Exception): pass @@ -160,7 +162,7 @@ class ImageProcessor: def get_searcher_by_path(self, path: str, rel_dir: str, source_dir: str) -> ImageProcessorNamespaceSearcher: return ImageProcessorNamespaceSearcher(self.get_namespace_by_path(path), rel_dir, source_dir) - def process_image(self, input_filename: str, format: str, searcher: ImageProcessorSearcher, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: + def process_image(self, input_filename: str, format: str, searcher: ImageProcessorSearcher, context: Context=None, width: int=None, height:int=None, quality: int=None, dpi: int=None, fit: bool=True, deps: list[str]=[]) -> str: name = os.path.basename(input_filename) base, ext = os.path.splitext(name) ext = ext[1:] @@ -237,6 +239,8 @@ class ImageProcessor: if subprocess.run(['convert', *density_arg, full_path, *resize_arg, *quality_arg, target_path]).returncode != 0: raise ImageMagickError(f"Could not convert '{full_path}' to '{format}'") + if context is not None: + context.add_deps(deps_full) return target_name def is_outdated(self, target: str, deps: list[str]): diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index f2e0853..8e854a3 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -498,6 +498,7 @@ class OutputGenerator: self.generate_simple_tag(e) if "footer_content" in e.metadata: self.generate(e.metadata["footer_content"]) + def generate_BlockGroup(self, e: BlockGroup): self.generate_simple_tag(e) diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py index 143e4a9..fe8708d 100644 --- a/src/formatitko/tex_generator.py +++ b/src/formatitko/tex_generator.py @@ -110,6 +110,7 @@ class UCWTexGenerator(OutputGenerator): url = e.url additional_args = self.get_image_processor_args(e.attributes) + additional_args["context"] = self.context # The directory of the current file relative to the current working directory source_dir = self.context.dir diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index 28231cd..9b7bc67 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -153,7 +153,9 @@ class TransformProcessor(NOPProcessor): pwd = os.path.abspath(".") if os.path.commonpath([full_path, pwd]) != os.path.commonpath([pwd]): return nullify(e) - text = open(self.context.dir + "/" + e.attributes["partial"], "r").read() + filename = self.context.dir + "/" + e.attributes["partial"] + self.context.add_dep(filename) + text = open(filename, "r").read() path = self.context.dir + "/" + e.attributes["partial"] if e.attributes["type"] == "md": includedDoc = import_md(text) @@ -217,7 +219,9 @@ class TransformProcessor(NOPProcessor): if not "type" in e.attributes: e.attributes["type"] = "module" if e.attributes["type"] == "md": - importedDoc = import_md(open(self.context.dir + "/" + e.content[0].text[1:], "r").read()) + filename = self.context.dir + "/" + e.content[0].text[1:] + self.context.add_dep(filename) + importedDoc = import_md(open(filename, "r").read()) self.transform(importedDoc.content) elif e.attributes["type"] == "module": matches = re.match(r"^(\w+)(?: as (\w+))?$", e.content[0].text[1:]) @@ -227,7 +231,9 @@ class TransformProcessor(NOPProcessor): module_name = matches.group(1) if matches.group(2) is None else matches.group(2) self.context.add_commands_from_module(module, module_name) elif e.attributes["type"] == "metadata": - data = json.load(open(self.context.dir + "/" + e.content[0].text[1:], "r")) + filename = self.context.dir + "/" + e.content[0].text[1:] + self.context.add_dep(filename) + data = json.load(open(filename, "r")) key = "" if not "key" in e.attributes else e.attributes["key"] self.context.import_metadata(data, key) else: From f0d939a65b61ac57d8ede780e142331ae3e674ac Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Fri, 23 Feb 2024 23:37:22 +0100 Subject: [PATCH 21/22] Separate context init for transform processor into separate function, add option to not make images clickable. --- src/formatitko/html_generator.py | 4 ++++ src/formatitko/transform_processor.py | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 280c60e..2aabe85 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -208,6 +208,10 @@ class HTMLGenerator(OutputGenerator): else: attributes["src"] = url + if e.attributes["no-img-link"]: + self.write(self.single_tag("img", attributes)) + return + img = RawInline(self.single_tag("img", attributes)) link = Link(img, url=url) diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index 9b7bc67..6ce7999 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -40,6 +40,15 @@ class TransformProcessor(NOPProcessor): def add_command_module(self, module: Union[dict[str, CommandCallable], ModuleType], module_name: str=""): self._command_modules.append((module, module_name)) + def init_context(self, e: Doc) -> Context: + if self.context is not None: + raise DoubleDocError() + self.context = Context(e, self.root_file_path) + for module, module_name in self._command_modules: + self.context.add_commands_from_module(module, module_name) + e.content = [BlockGroup(*e.content, context=self.context)] + return self.context + def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]: return super().get_pretransformers()+[self.handle_if_attribute, self.handle_ifnot_attribute] @@ -71,12 +80,7 @@ class TransformProcessor(NOPProcessor): raise err def transform_Doc(self, e: Doc) -> Doc: - if self.context is not None: - raise DoubleDocError() - self.context = Context(e, self.root_file_path) - for module, module_name in self._command_modules: - self.context.add_commands_from_module(module, module_name) - e.content = [BlockGroup(*e.content, context=self.context)] + self.init_context(e) e.content = self.transform(e.content) return e @@ -95,8 +99,10 @@ class TransformProcessor(NOPProcessor): e.content = self.transform(e.content) # OG now has Context so this is not needed per se, but I'm keeping this here for the handling of attribute > context > default value # Pass down "no-srcset" metadatum as attribute down to images. - if not "no-srcset" in e.attributes: + if "no-srcset" not in e.attributes: e.attributes["no-srcset"] = self.context.get_metadata("no-srcset") if self.context.get_metadata("no-srcset") is not None else False + if "no-img-link" not in e.attributes: + e.attributes["no-img-link"] = self.context.get_metadata("no-img-link") if self.context.get_metadata("no-img-link") is not None else False return e def create_Group(self, *content, new_context: Context, replaced:Element, inline: bool=False) -> Group: From 7f52abde14f7fcefce39adb493035f217c03138a Mon Sep 17 00:00:00 2001 From: Greenscreener Date: Sat, 24 Feb 2024 02:11:57 +0100 Subject: [PATCH 22/22] katex-server bump: Removed null coalescing operator, because I don't want to update node on every computer in existence. --- src/formatitko/katex-server | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/formatitko/katex-server b/src/formatitko/katex-server index 909d075..953b44e 160000 --- a/src/formatitko/katex-server +++ b/src/formatitko/katex-server @@ -1 +1 @@ -Subproject commit 909d075b9d37bdf19f5e9d382d6a441125fe7aa4 +Subproject commit 953b44e942282375ac369af233c123b28146713e