diff --git a/src/formatitko/command_util.py b/src/formatitko/command_util.py index d68815e..8b01ca2 100644 --- a/src/formatitko/command_util.py +++ b/src/formatitko/command_util.py @@ -1,5 +1,5 @@ from .context import Context, CommandCallable # This is there because of a wild circular import dependency between many functions and classes -from panflute import CodeBlock +from panflute import CodeBlock, Null from . import command_env @@ -22,16 +22,13 @@ def parse_command(code: str) -> CommandCallable: # This function is called in trasform.py, defining a command which can be # called later -def handle_command_define(e: CodeBlock, c: Context): +def handle_command_define(e: CodeBlock, c: Context) -> Null: command = parse_command(e.text) if "define" in e.attributes: if not c.get_command(e.attributes["define"]): c.set_command(e.attributes["define"], command) - return nullify(e) else: raise NameError(f"Command already defined: '{e.attributes['define']}'") if "redefine" in e.attributes: c.set_command(e.attributes["redefine"], command) - return nullify(e) - return e - + return nullify(e) diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index ba7a95e..503f7ff 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -13,6 +13,7 @@ from .tex import tex from .images import ImageProcessor from .output_generator import OutputGenerator from .html_generator import HTMLGenerator +from .transform_processor import TransformProcessor from .mj_show import show @@ -32,22 +33,25 @@ def main(): # just connect to an existing one. For formátíking many files in a row. # Use panflute to parse the input MD file - doc = import_md(open(args.input_filename, "r").read()) + doc1 = import_md(open(args.input_filename, "r").read()) + doc2 = import_md(open(args.input_filename, "r").read()) if args.debug: - print(show(doc)) + print(show(doc1)) # The language metadatum is important, so it's read before transformation and # then attached to a group inside the Doc - language = doc.get_metadata("language", None, True) - context = Context(doc, args.input_filename) + language = doc1.get_metadata("language", None, True) + context = Context(doc1, args.input_filename) # Transform the document. This includes all the fancy formatting this software does. - doc = doc.walk(transform, context) + doc1 = doc1.walk(transform, context) # Now wrap the document contents in a group, which is able to pop its language # setting out to TeX - doc.content = [Group(*doc.content, context=context, metadata={"language":language})] + doc1.content = [Group(*doc1.content, context=context, metadata={"language":language})] + + doc2 = TransformProcessor(args.input_filename).transform(doc2) # Initialize the image processor (this just keeps some basic state) imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs) @@ -57,8 +61,9 @@ def main(): # Generate HTML and TeX out of the transformed document #open(args.output_html, "w").write(html(doc, katexClient, imageProcessor)) #open(args.output_tex, "w").write(tex(doc, imageProcessor)) - HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc) - # OutputGenerator(sys.stdout).generate(doc) + # HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc) + OutputGenerator(open("/tmp/doc1", "w")).generate(doc1) + OutputGenerator(open("/tmp/doc2", "w")).generate(doc2) if args.debug: print(show(doc)) diff --git a/src/formatitko/transform.py b/src/formatitko/transform.py index 9dc824d..83601de 100644 --- a/src/formatitko/transform.py +++ b/src/formatitko/transform.py @@ -99,6 +99,7 @@ def transform(e: Element, c: Context) -> Element: if not "style" in e.attributes: e.attributes["style"] = c.get_metadata("highlight-style") if c.get_metadata("highlight-style") is not None else "default" e.attributes["noclasses"] = False + # I think this is supposed to enable inline styles for highlighting when the style differs from the document, but it clearly doesn't work. a) HTML_generator never accesses it and b) Only the top-level document contains a style so you have to ask the top level context, not the current context. else: e.attributes["noclasses"] = True @@ -131,7 +132,7 @@ def transform(e: Element, c: Context) -> Element: # Shorter (and sometimes the only) forms of certain features if isinstance(e, Span) and len(e.content) == 1 and isinstance(e.content[0], Str): ## Handle special command shorthand [!commandname]{} - if re.match(r"^![\w]+$", e.content[0].text): + if re.match(r"^![\w.]+$", e.content[0].text): e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]}) ## Handle import [#path/file.md]{} diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index 6fc55e6..c5d4311 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -2,11 +2,19 @@ from panflute import Element, ListContainer, Inline, Block from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, Superscript, Underline from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead from panflute import TableRow, TableCell, Caption, Doc -from typing import Union +from typing import Union, Callable + +import os +import re from .whitespace import NBSP from .elements import FQuoted from .context import Group +from .util import nullify, import_md +from .context import Context +from .whitespace import Whitespace, bavlna +from .command import BlockCommand, InlineCommand, Command +from .command_util import handle_command_define, parse_command ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]] @@ -14,10 +22,15 @@ class UnknownElementError(Exception): "An unknown Element has been passed to the TransformProcessor, probably because panflute introduced a new one." pass +class DoubleDocError(Exception): + "TransformProcessor should only ever see a single Doc." + pass + class TransformProcessor: - def __init__(self): - self.context = None + def __init__(self, root_file_path: str): + self.context: Context = None + self.root_file_path = root_file_path self.TYPE_DICT = { TableRow: self.transform_TableRow, @@ -77,7 +90,10 @@ class TransformProcessor: Superscript: self.transform_Superscript, Underline: self.transform_Underline, NBSP: self.transform_NBSP, - FQuoted: self.transform_FQuoted + FQuoted: self.transform_FQuoted, + + InlineCommand: self.transform_InlineCommand, + BlockCommand: self.transform_BlockCommand } def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]: @@ -111,215 +127,236 @@ class TransformProcessor: if hasattr(e, "attributes") and "if" in e.attributes: if not self.context.is_flag_set(e.attributes["if"]): return nullify(e) + return e def handle_ifnot_attribute(self, e: ELCl) -> ELCl: # `ifnot` attribute. Only show this element if flag is NOT set if hasattr(e, "attributes") and "ifnot" in e.attributes: if self.context.is_flag_set(e.attributes["ifnot"]): return nullify(e) + return e - def transform_list(e: list[ELCl]) -> list[ELCl]: + def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: for i in range(len(e)): e[i] = self.transform(e[i]) return e - def transform_ListContainer(e: ListContainer) -> ListContainer: + def transform_ListContainer(self, e: ListContainer) -> ListContainer: for i in range(len(e)): e[i] = self.transform(e[i]) return e - def transform_TableRow(e: TableRow) -> TableRow: + def transform_TableRow(self, e: TableRow) -> TableRow: e.content = self.transform(e.content) return e - def transform_TableCell(e: TableCell) -> TableCell: + def transform_TableCell(self, e: TableCell) -> TableCell: e.content = self.transform(e.content) return e - def transform_Caption(e: Caption) -> Caption: + def transform_Caption(self, e: Caption) -> Caption: e.content = self.transform(e.content) return e - def transform_Doc(e: Doc) -> Doc: + def transform_LineItem(self, e: LineItem) -> LineItem: e.content = self.transform(e.content) return e - def transform_LineItem(e: LineItem) -> LineItem: + def transform_ListItem(self, e: ListItem) -> ListItem: e.content = self.transform(e.content) return e - def transform_ListItem(e: ListItem) -> ListItem: + def transform_BlockQuote(self, e: BlockQuote) -> BlockQuote: e.content = self.transform(e.content) return e - def transform_BlockQuote(e: BlockQuote) -> BlockQuote: + def transform_BulletList(self, e: BulletList) -> BulletList: e.content = self.transform(e.content) return e - def transform_BulletList(e: BulletList) -> BulletList: + def transform_Citation(self, e: Citation) -> Citation: e.content = self.transform(e.content) return e - def transform_Citation(e: Citation) -> Citation: + def transform_Definition(self, e: Definition) -> Definition: e.content = self.transform(e.content) return e - def transform_Definition(e: Definition) -> Definition: + def transform_DefinitionItem(self, e: DefinitionItem) -> DefinitionItem: e.content = self.transform(e.content) return e - def transform_DefinitionItem(e: DefinitionItem) -> DefinitionItem: + def transform_DefinitionList(self, e: DefinitionList) -> DefinitionList: e.content = self.transform(e.content) return e - def transform_DefinitionList(e: DefinitionList) -> DefinitionList: + def transform_Header(self, e: Header) -> Header: e.content = self.transform(e.content) return e - def transform_Header(e: Header) -> Header: + def transform_LineBlock(self, e: LineBlock) -> LineBlock: e.content = self.transform(e.content) return e - def transform_LineBlock(e: LineBlock) -> LineBlock: + def transform_MetaBlocks(self, e: MetaBlocks) -> MetaBlocks: e.content = self.transform(e.content) return e - def transform_MetaBlocks(e: MetaBlocks) -> MetaBlocks: + def transform_MetaBool(self, e: MetaBool) -> MetaBool: e.content = self.transform(e.content) return e - def transform_MetaBool(e: MetaBool) -> MetaBool: + def transform_MetaInlines(self, e: MetaInlines) -> MetaInlines: e.content = self.transform(e.content) return e - def transform_MetaInlines(e: MetaInlines) -> MetaInlines: + def transform_MetaList(self, e: MetaList) -> MetaList: e.content = self.transform(e.content) return e - def transform_MetaList(e: MetaList) -> MetaList: + def transform_MetaMap(self, e: MetaMap) -> MetaMap: e.content = self.transform(e.content) return e - def transform_MetaMap(e: MetaMap) -> MetaMap: + def transform_MetaString(self, e: MetaString) -> MetaString: e.content = self.transform(e.content) return e - def transform_MetaString(e: MetaString) -> MetaString: + def transform_OrderedList(self, e: OrderedList) -> OrderedList: e.content = self.transform(e.content) return e - def transform_OrderedList(e: OrderedList) -> OrderedList: + def transform_Para(self, e: Para) -> Para: e.content = self.transform(e.content) return e - def transform_Para(e: Para) -> Para: + def transform_Plain(self, e: Plain) -> Plain: e.content = self.transform(e.content) return e - def transform_Plain(e: Plain) -> Plain: + def transform_TableBody(self, e: TableBody) -> TableBody: e.content = self.transform(e.content) return e - def transform_TableBody(e: TableBody) -> TableBody: + def transform_TableFoot(self, e: TableFoot) -> TableFoot: e.content = self.transform(e.content) return e - def transform_TableFoot(e: TableFoot) -> TableFoot: + def transform_TableHead(self, e: TableHead) -> TableHead: e.content = self.transform(e.content) return e - def transform_TableHead(e: TableHead) -> TableHead: + def transform_Group(self, e: Group) -> Group: e.content = self.transform(e.content) return e - def transform_Group(e: Group) -> Group: + def transform_Cite(self, e: Cite) -> Cite: e.content = self.transform(e.content) return e - def transform_Cite(e: Cite) -> Cite: + def transform_Emph(self, e: Emph) -> Emph: e.content = self.transform(e.content) return e - def transform_Emph(e: Emph) -> Emph: + def transform_Link(self, e: Link) -> Link: e.content = self.transform(e.content) return e - def transform_Image(e: Image) -> Image: + def transform_Note(self, e: Note) -> Note: e.content = self.transform(e.content) return e - def transform_Link(e: Link) -> Link: + def transform_SmallCaps(self, e: SmallCaps) -> SmallCaps: e.content = self.transform(e.content) return e - def transform_Note(e: Note) -> Note: + def transform_Strikeout(self, e: Strikeout) -> Strikeout: e.content = self.transform(e.content) return e - def transform_Quoted(e: Quoted) -> Quoted: + def transform_Strong(self, e: Strong) -> Strong: e.content = self.transform(e.content) return e - def transform_SmallCaps(e: SmallCaps) -> SmallCaps: + def transform_Subscript(self, e: Subscript) -> Subscript: e.content = self.transform(e.content) return e - def transform_Strikeout(e: Strikeout) -> Strikeout: + def transform_Superscript(self, e: Superscript) -> Superscript: e.content = self.transform(e.content) return e - def transform_Strong(e: Strong) -> Strong: + def transform_Underline(self, e: Underline) -> Underline: e.content = self.transform(e.content) return e - def transform_Subscript(e: Subscript) -> Subscript: + def transform_FQuoted(self, e: FQuoted) -> FQuoted: e.content = self.transform(e.content) return e - def transform_Superscript(e: Superscript) -> Superscript: + def transform_Figure(self, e: Figure) -> Figure: e.content = self.transform(e.content) + e.caption = self.transform(e.caption) return e - def transform_Underline(e: Underline) -> Underline: + def transform_Table(self, e: Table) -> Table: + e.head = self.transform(e.head) e.content = self.transform(e.content) + e.foot = self.transform(e.foot) return e - def transform_FQuoted(e: FQuoted) -> FQuoted: + + def transform_Doc(self, e: Doc) -> Doc: + if self.context is not None: + raise DoubleDocError() + self.context = Context(e, self.root_file_path) e.content = self.transform(e.content) + e.content = [Group(*e.content, context=self.context)] return e - def transform_Figure(e: Figure) -> Figure: + def transform_Quoted(self, e: Quoted) -> FQuoted: e.content = self.transform(e.content) - e.caption = self.transform(e.caption) - return e + quote_styles = { + "cs": "cs", + "en": "en", + "sk": "cs", + None: None + } + print(self.context.get_metadata("language")) + return FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[self.context.get_metadata("language")]) - def transform_Table(e: Table) -> Table: - e.head = self.transform(e.head) + def transform_Image(self, e: Image) -> Image: e.content = self.transform(e.content) - e.foot = self.transform(e.foot) + # FIXME? Passing down attributes explicitly no longer needed, because OG now has access to Context. + # Pass down the directory of the current source file for finding image + # files. + e.attributes["source_dir"] = self.context.dir + # FIXME? Passing down attributes explicitly no longer needed, because OG now has access to Context. + # Pass down "no-srcset" metadatum as attribute down to images. + if not "no-srcset" in e.attributes: + e.attributes["no-srcset"] = self.context.get_metadata("no-srcset") if self.context.get_metadata("no-srcset") is not None else False return e - - def create_Group(*content, new_context: Context) -> Group: + def create_Group(self, *content, new_context: Context) -> Group: old_context = self.context self.context = new_context - content = self.transform(content) + content = self.transform([*content]) self.context = old_context return Group(*content, context=new_context) - def transform_Div(e: Div) -> Div: + def transform_Div(self, e: Div) -> Union[Div, Group, Null]: e.content = self.transform(e.content) if "group" in e.classes: # `.group` class for Divs # Content of Div is enclosed in a separate context, all attributes are passed as metadata new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) - for attribute, value in e.attributes: - new_context.set_metadata(attribute, value) - return self.create_Group(*e.content, new_context) + for attribute, value in e.attributes.items(): + new_context.set_metadata(attribute, value) # FIXME: This raises a warning when done with `language`. Since context is available to OG, we should trash the warning and rework the OG to use the Context. + return self.create_Group(*e.content, new_context=new_context) if "c" in e.attributes: # Commands can be called multiple ways, this handles the following syntax: @@ -344,21 +381,21 @@ class TransformProcessor: trusted = False if not self.context.trusted: trusted = False - return self.create_Group(*e.content, Context(includedDoc, path, self.context, trusted=trusted)) + return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, path, self.context, trusted=trusted)) return e - def transform_Span(e: Span) -> Span: + def transform_Span(self, e: Span) -> Span: e.content = self.transform(e.content) - if "group" in e.classes: - # `.group` class for Spans - # Content of Span is enclosed in a separate context, all attributes are passed as metadata - # FIXME: This probably doesn't work... Will we error? - new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) - for attribute, value in e.attributes: - new_context.set_metadata(attribute, value) - return self.create_Group(*e.content, new_context) + # TODO: This sadly doesn't work. We would need to create a separate class InlineGroup, that would be Inline. + #if "group" in e.classes: + # # `.group` class for Spans + # # Content of Span is enclosed in a separate context, all attributes are passed as metadata + # new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted) + # for attribute, value in e.attributes.items(): + # new_context.set_metadata(attribute, value) + # return self.create_Group(*e.content, new_context=new_context) if "c" in e.attributes: # Commands can be called multiple ways, this handles the following syntax: @@ -366,54 +403,118 @@ class TransformProcessor: e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes) return self.transform(e) + if len(e.content) == 1 and isinstance(e.content[0], Str): + ## Handle special command shorthand [!commandname]{} + if re.match(r"^![\w.]+$", e.content[0].text): + e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]}) + return self.transform(e) + + ## Handle import [#path/file.md]{} + # This is the exact opposite of partials. We take the commands, flags + # and metadata but drop the content. + elif re.match(r"^#.+$", e.content[0].text): + importedDoc = import_md(open(self.context.dir + "/" + e.content[0].text[1:], "r").read()) + self.transform(importedDoc.content) + return nullify(e) + + ## Handle metadata print [$key1.key2]{} + # This is a shorthand for just printing the content of some metadata. + elif re.match(r"^\$[\w.]+$", e.content[0].text): + val = self.context.get_metadata(e.content[0].text[1:], False) + if isinstance(val, MetaInlines): + e = Span(*val.content) + e = self.transform(e) + elif isinstance(val, MetaString): + e = Span(Str(val.string)) + elif isinstance(val, MetaBool): + e = Span(Str(str(val.boolean))) + else: + raise TypeError(f"Cannot print value of metadatum '{e.content[0].text[1:]}' of type '{type(val)}'") + return e + return e - def transform_CodeBlock(e: CodeBlock) -> CodeBlock: + def transform_CodeBlock(self, e: CodeBlock) -> Union[CodeBlock, Div, Null]: if "markdown" in e.classes and "group" in e.classes: includedDoc = import_md(e.text) - return self.create_Group(*includedDoc.content, context=Context(includedDoc, self.context.path, self.context, self.context.trusted)) + return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, self.context.path, self.context, self.context.trusted)) + + if "python" in e.classes and "run" in e.classes: + if not self.context.trusted: + return nullify(e) + command_output = parse_command(e.text)(BlockCommand(), self.context) + e = BlockCommand().replaceSelf(*([] if command_output is None else command_output)) + return self.transform(e) + + if "python" in e.classes and ("define" in e.attributes or "redefine" in e.attributes): + if not self.context.trusted: + return nullify(e) + return handle_command_define(e, self.context) + + # Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks + # FIXME? Passing down attributes explicitly no longer needed, because OG now has access to Context. + if not "highlight" in e.attributes: + e.attributes["highlight"] = self.context.get_metadata("highlight") if self.context.get_metadata("highlight") is not None else True + if not "style" in e.attributes: + e.attributes["style"] = self.context.get_metadata("highlight-style") if self.context.get_metadata("highlight-style") is not None else "default" + e.attributes["noclasses"] = False + # I think this is supposed to enable inline styles for highlighting when the style differs from the document, but it clearly doesn't work. a) HTML_generator never accesses it and b) Only the top-level document contains a style so you have to ask the top level context, not the current context. + else: + e.attributes["noclasses"] = True return e + def transform_Command(self, e: Command) -> Union[Div, Span]: + if not self.context.get_command(e.attributes["c"]): + raise NameError(f"Command not defined '{e.attributes['c']}'.") + command_output = self.context.get_command(e.attributes["c"])(e, self.context) + e = e.replaceSelf(*command_output) + return self.transform(e) + + def transform_InlineCommand(self, e: InlineCommand) -> Span: + return self.transform_Command(e) + + def transform_BlockCommand(self, e: BlockCommand) -> Div: + return self.transform_Command(e) - def transform_Whitespace(e: Whitespace) -> Whitespace: + def transform_Whitespace(self, e: Whitespace) -> Whitespace: if bavlna(e, self.context): return NBSP() else: return e - def transform_SoftBreak(e: SoftBreak) -> SoftBreak: + def transform_SoftBreak(self, e: SoftBreak) -> Whitespace: return self.transform_Whitespace(e) - def transform_Space(e: Space) -> Space: + def transform_Space(self, e: Space) -> Whitespace: return self.transform_Whitespace(e) - def transform_NBSP(e: NBSP) -> NBSP: + def transform_NBSP(self, e: NBSP) -> NBSP: return e - def transform_Str(e: Str) -> Str: + def transform_Str(self, e: Str) -> Str: return e - def transform_RawInline(e: RawInline) -> RawInline: + def transform_RawInline(self, e: RawInline) -> RawInline: return e - def transform_Math(e: Math) -> Math: + def transform_Math(self, e: Math) -> Math: return e - def transform_LineBreak(e: LineBreak) -> LineBreak: + def transform_LineBreak(self, e: LineBreak) -> LineBreak: return e - def transform_Code(e: Code) -> Code: + def transform_Code(self, e: Code) -> Code: return e - def transform_RawBlock(e: RawBlock) -> RawBlock: + def transform_RawBlock(self, e: RawBlock) -> RawBlock: return e - def transform_Null(e: Null) -> Null: + def transform_Null(self, e: Null) -> Null: return e - def transform_HorizontalRule(e: HorizontalRule) -> HorizontalRule: + def transform_HorizontalRule(self, e: HorizontalRule) -> HorizontalRule: return e diff --git a/test/test.md b/test/test.md index 161d6f1..90eb111 100644 --- a/test/test.md +++ b/test/test.md @@ -38,7 +38,7 @@ context.set_flag("cat", True) context.set_metadata("a", {}) context.set_metadata("a.b", {}) context.set_metadata("a.b.c", "Bruh **bruh** bruh") -return [*parse_string("The main document's title is "), fe.FQuoted(*parse_string(context.get_metadata('title')), style="simple"), pf.Str(".")] +return [*parse_string("The main document's title is "), pf.Quoted(*parse_string(context.get_metadata('title')), quote_type="SingleQuote"), pf.Str(".")] ``` ```python {style=native} @@ -115,6 +115,17 @@ $$ --- +In this text, there might be some phrases [v češtině]{.group language=cs} and +maybe even + +:::{.group language=cs} +celé pasáže textu v češtině. + +Růžový bagr bez zeleného bagru se žlutým bagrem. + +Yay můžeme mít izolované kontexty bez opakovaného volání pandocu jupí! +::: + This should be seen by all.^[This is a footnote] | Matematicko-fyzikální fakulta University Karlovy