Browse Source

Vytvořena třída TransformProcessor, nahrazuje tranform.py. Nepoužívá panflutí walk, ale řeší si rekurzi sama. Aktuálně by měla umět vše, co původní transform. #21

pull/28/head
Jan Černohorský 1 year ago
parent
commit
1a629b2bf9
  1. 9
      src/formatitko/command_util.py
  2. 21
      src/formatitko/formatitko.py
  3. 3
      src/formatitko/transform.py
  4. 267
      src/formatitko/transform_processor.py
  5. 13
      test/test.md

9
src/formatitko/command_util.py

@ -1,5 +1,5 @@
from .context import Context, CommandCallable # This is there because of a wild circular import dependency between many functions and classes
from panflute import CodeBlock
from panflute import CodeBlock, Null
from . import command_env
@ -22,16 +22,13 @@ def parse_command(code: str) -> CommandCallable:
# This function is called in trasform.py, defining a command which can be
# called later
def handle_command_define(e: CodeBlock, c: Context):
def handle_command_define(e: CodeBlock, c: Context) -> Null:
command = parse_command(e.text)
if "define" in e.attributes:
if not c.get_command(e.attributes["define"]):
c.set_command(e.attributes["define"], command)
return nullify(e)
else:
raise NameError(f"Command already defined: '{e.attributes['define']}'")
if "redefine" in e.attributes:
c.set_command(e.attributes["redefine"], command)
return nullify(e)
return e
return nullify(e)

21
src/formatitko/formatitko.py

@ -13,6 +13,7 @@ from .tex import tex
from .images import ImageProcessor
from .output_generator import OutputGenerator
from .html_generator import HTMLGenerator
from .transform_processor import TransformProcessor
from .mj_show import show
@ -32,22 +33,25 @@ def main():
# just connect to an existing one. For formátíking many files in a row.
# Use panflute to parse the input MD file
doc = import_md(open(args.input_filename, "r").read())
doc1 = import_md(open(args.input_filename, "r").read())
doc2 = import_md(open(args.input_filename, "r").read())
if args.debug:
print(show(doc))
print(show(doc1))
# The language metadatum is important, so it's read before transformation and
# then attached to a group inside the Doc
language = doc.get_metadata("language", None, True)
context = Context(doc, args.input_filename)
language = doc1.get_metadata("language", None, True)
context = Context(doc1, args.input_filename)
# Transform the document. This includes all the fancy formatting this software does.
doc = doc.walk(transform, context)
doc1 = doc1.walk(transform, context)
# Now wrap the document contents in a group, which is able to pop its language
# setting out to TeX
doc.content = [Group(*doc.content, context=context, metadata={"language":language})]
doc1.content = [Group(*doc1.content, context=context, metadata={"language":language})]
doc2 = TransformProcessor(args.input_filename).transform(doc2)
# Initialize the image processor (this just keeps some basic state)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, args.img_cache_dir, *args.img_lookup_dirs)
@ -57,8 +61,9 @@ def main():
# Generate HTML and TeX out of the transformed document
#open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
#open(args.output_tex, "w").write(tex(doc, imageProcessor))
HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc)
# OutputGenerator(sys.stdout).generate(doc)
# HTMLGenerator(sys.stdout, katexClient, imageProcessor).generate(doc)
OutputGenerator(open("/tmp/doc1", "w")).generate(doc1)
OutputGenerator(open("/tmp/doc2", "w")).generate(doc2)
if args.debug:
print(show(doc))

3
src/formatitko/transform.py

@ -99,6 +99,7 @@ def transform(e: Element, c: Context) -> Element:
if not "style" in e.attributes:
e.attributes["style"] = c.get_metadata("highlight-style") if c.get_metadata("highlight-style") is not None else "default"
e.attributes["noclasses"] = False
# I think this is supposed to enable inline styles for highlighting when the style differs from the document, but it clearly doesn't work. a) HTML_generator never accesses it and b) Only the top-level document contains a style so you have to ask the top level context, not the current context.
else:
e.attributes["noclasses"] = True
@ -131,7 +132,7 @@ def transform(e: Element, c: Context) -> Element:
# Shorter (and sometimes the only) forms of certain features
if isinstance(e, Span) and len(e.content) == 1 and isinstance(e.content[0], Str):
## Handle special command shorthand [!commandname]{}
if re.match(r"^![\w]+$", e.content[0].text):
if re.match(r"^![\w.]+$", e.content[0].text):
e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]})
## Handle import [#path/file.md]{}

267
src/formatitko/transform_processor.py

@ -2,11 +2,19 @@ from panflute import Element, ListContainer, Inline, Block
from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, Superscript, Underline
from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead
from panflute import TableRow, TableCell, Caption, Doc
from typing import Union
from typing import Union, Callable
import os
import re
from .whitespace import NBSP
from .elements import FQuoted
from .context import Group
from .util import nullify, import_md
from .context import Context
from .whitespace import Whitespace, bavlna
from .command import BlockCommand, InlineCommand, Command
from .command_util import handle_command_define, parse_command
ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]]
@ -14,10 +22,15 @@ class UnknownElementError(Exception):
"An unknown Element has been passed to the TransformProcessor, probably because panflute introduced a new one."
pass
class DoubleDocError(Exception):
"TransformProcessor should only ever see a single Doc."
pass
class TransformProcessor:
def __init__(self):
self.context = None
def __init__(self, root_file_path: str):
self.context: Context = None
self.root_file_path = root_file_path
self.TYPE_DICT = {
TableRow: self.transform_TableRow,
@ -77,7 +90,10 @@ class TransformProcessor:
Superscript: self.transform_Superscript,
Underline: self.transform_Underline,
NBSP: self.transform_NBSP,
FQuoted: self.transform_FQuoted
FQuoted: self.transform_FQuoted,
InlineCommand: self.transform_InlineCommand,
BlockCommand: self.transform_BlockCommand
}
def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]:
@ -111,215 +127,236 @@ class TransformProcessor:
if hasattr(e, "attributes") and "if" in e.attributes:
if not self.context.is_flag_set(e.attributes["if"]):
return nullify(e)
return e
def handle_ifnot_attribute(self, e: ELCl) -> ELCl:
# `ifnot` attribute. Only show this element if flag is NOT set
if hasattr(e, "attributes") and "ifnot" in e.attributes:
if self.context.is_flag_set(e.attributes["ifnot"]):
return nullify(e)
return e
def transform_list(e: list[ELCl]) -> list[ELCl]:
def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]:
for i in range(len(e)):
e[i] = self.transform(e[i])
return e
def transform_ListContainer(e: ListContainer) -> ListContainer:
def transform_ListContainer(self, e: ListContainer) -> ListContainer:
for i in range(len(e)):
e[i] = self.transform(e[i])
return e
def transform_TableRow(e: TableRow) -> TableRow:
def transform_TableRow(self, e: TableRow) -> TableRow:
e.content = self.transform(e.content)
return e
def transform_TableCell(e: TableCell) -> TableCell:
def transform_TableCell(self, e: TableCell) -> TableCell:
e.content = self.transform(e.content)
return e
def transform_Caption(e: Caption) -> Caption:
def transform_Caption(self, e: Caption) -> Caption:
e.content = self.transform(e.content)
return e
def transform_Doc(e: Doc) -> Doc:
def transform_LineItem(self, e: LineItem) -> LineItem:
e.content = self.transform(e.content)
return e
def transform_LineItem(e: LineItem) -> LineItem:
def transform_ListItem(self, e: ListItem) -> ListItem:
e.content = self.transform(e.content)
return e
def transform_ListItem(e: ListItem) -> ListItem:
def transform_BlockQuote(self, e: BlockQuote) -> BlockQuote:
e.content = self.transform(e.content)
return e
def transform_BlockQuote(e: BlockQuote) -> BlockQuote:
def transform_BulletList(self, e: BulletList) -> BulletList:
e.content = self.transform(e.content)
return e
def transform_BulletList(e: BulletList) -> BulletList:
def transform_Citation(self, e: Citation) -> Citation:
e.content = self.transform(e.content)
return e
def transform_Citation(e: Citation) -> Citation:
def transform_Definition(self, e: Definition) -> Definition:
e.content = self.transform(e.content)
return e
def transform_Definition(e: Definition) -> Definition:
def transform_DefinitionItem(self, e: DefinitionItem) -> DefinitionItem:
e.content = self.transform(e.content)
return e
def transform_DefinitionItem(e: DefinitionItem) -> DefinitionItem:
def transform_DefinitionList(self, e: DefinitionList) -> DefinitionList:
e.content = self.transform(e.content)
return e
def transform_DefinitionList(e: DefinitionList) -> DefinitionList:
def transform_Header(self, e: Header) -> Header:
e.content = self.transform(e.content)
return e
def transform_Header(e: Header) -> Header:
def transform_LineBlock(self, e: LineBlock) -> LineBlock:
e.content = self.transform(e.content)
return e
def transform_LineBlock(e: LineBlock) -> LineBlock:
def transform_MetaBlocks(self, e: MetaBlocks) -> MetaBlocks:
e.content = self.transform(e.content)
return e
def transform_MetaBlocks(e: MetaBlocks) -> MetaBlocks:
def transform_MetaBool(self, e: MetaBool) -> MetaBool:
e.content = self.transform(e.content)
return e
def transform_MetaBool(e: MetaBool) -> MetaBool:
def transform_MetaInlines(self, e: MetaInlines) -> MetaInlines:
e.content = self.transform(e.content)
return e
def transform_MetaInlines(e: MetaInlines) -> MetaInlines:
def transform_MetaList(self, e: MetaList) -> MetaList:
e.content = self.transform(e.content)
return e
def transform_MetaList(e: MetaList) -> MetaList:
def transform_MetaMap(self, e: MetaMap) -> MetaMap:
e.content = self.transform(e.content)
return e
def transform_MetaMap(e: MetaMap) -> MetaMap:
def transform_MetaString(self, e: MetaString) -> MetaString:
e.content = self.transform(e.content)
return e
def transform_MetaString(e: MetaString) -> MetaString:
def transform_OrderedList(self, e: OrderedList) -> OrderedList:
e.content = self.transform(e.content)
return e
def transform_OrderedList(e: OrderedList) -> OrderedList:
def transform_Para(self, e: Para) -> Para:
e.content = self.transform(e.content)
return e
def transform_Para(e: Para) -> Para:
def transform_Plain(self, e: Plain) -> Plain:
e.content = self.transform(e.content)
return e
def transform_Plain(e: Plain) -> Plain:
def transform_TableBody(self, e: TableBody) -> TableBody:
e.content = self.transform(e.content)
return e
def transform_TableBody(e: TableBody) -> TableBody:
def transform_TableFoot(self, e: TableFoot) -> TableFoot:
e.content = self.transform(e.content)
return e
def transform_TableFoot(e: TableFoot) -> TableFoot:
def transform_TableHead(self, e: TableHead) -> TableHead:
e.content = self.transform(e.content)
return e
def transform_TableHead(e: TableHead) -> TableHead:
def transform_Group(self, e: Group) -> Group:
e.content = self.transform(e.content)
return e
def transform_Group(e: Group) -> Group:
def transform_Cite(self, e: Cite) -> Cite:
e.content = self.transform(e.content)
return e
def transform_Cite(e: Cite) -> Cite:
def transform_Emph(self, e: Emph) -> Emph:
e.content = self.transform(e.content)
return e
def transform_Emph(e: Emph) -> Emph:
def transform_Link(self, e: Link) -> Link:
e.content = self.transform(e.content)
return e
def transform_Image(e: Image) -> Image:
def transform_Note(self, e: Note) -> Note:
e.content = self.transform(e.content)
return e
def transform_Link(e: Link) -> Link:
def transform_SmallCaps(self, e: SmallCaps) -> SmallCaps:
e.content = self.transform(e.content)
return e
def transform_Note(e: Note) -> Note:
def transform_Strikeout(self, e: Strikeout) -> Strikeout:
e.content = self.transform(e.content)
return e
def transform_Quoted(e: Quoted) -> Quoted:
def transform_Strong(self, e: Strong) -> Strong:
e.content = self.transform(e.content)
return e
def transform_SmallCaps(e: SmallCaps) -> SmallCaps:
def transform_Subscript(self, e: Subscript) -> Subscript:
e.content = self.transform(e.content)
return e
def transform_Strikeout(e: Strikeout) -> Strikeout:
def transform_Superscript(self, e: Superscript) -> Superscript:
e.content = self.transform(e.content)
return e
def transform_Strong(e: Strong) -> Strong:
def transform_Underline(self, e: Underline) -> Underline:
e.content = self.transform(e.content)
return e
def transform_Subscript(e: Subscript) -> Subscript:
def transform_FQuoted(self, e: FQuoted) -> FQuoted:
e.content = self.transform(e.content)
return e
def transform_Superscript(e: Superscript) -> Superscript:
def transform_Figure(self, e: Figure) -> Figure:
e.content = self.transform(e.content)
e.caption = self.transform(e.caption)
return e
def transform_Underline(e: Underline) -> Underline:
def transform_Table(self, e: Table) -> Table:
e.head = self.transform(e.head)
e.content = self.transform(e.content)
e.foot = self.transform(e.foot)
return e
def transform_FQuoted(e: FQuoted) -> FQuoted:
def transform_Doc(self, e: Doc) -> Doc:
if self.context is not None:
raise DoubleDocError()
self.context = Context(e, self.root_file_path)
e.content = self.transform(e.content)
e.content = [Group(*e.content, context=self.context)]
return e
def transform_Figure(e: Figure) -> Figure:
def transform_Quoted(self, e: Quoted) -> FQuoted:
e.content = self.transform(e.content)
e.caption = self.transform(e.caption)
return e
quote_styles = {
"cs": "cs",
"en": "en",
"sk": "cs",
None: None
}
print(self.context.get_metadata("language"))
return FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[self.context.get_metadata("language")])
def transform_Table(e: Table) -> Table:
e.head = self.transform(e.head)
def transform_Image(self, e: Image) -> Image:
e.content = self.transform(e.content)
e.foot = self.transform(e.foot)
# FIXME? Passing down attributes explicitly no longer needed, because OG now has access to Context.
# Pass down the directory of the current source file for finding image
# files.
e.attributes["source_dir"] = self.context.dir
# FIXME? Passing down attributes explicitly no longer needed, because OG now has access to Context.
# Pass down "no-srcset" metadatum as attribute down to images.
if not "no-srcset" in e.attributes:
e.attributes["no-srcset"] = self.context.get_metadata("no-srcset") if self.context.get_metadata("no-srcset") is not None else False
return e
def create_Group(*content, new_context: Context) -> Group:
def create_Group(self, *content, new_context: Context) -> Group:
old_context = self.context
self.context = new_context
content = self.transform(content)
content = self.transform([*content])
self.context = old_context
return Group(*content, context=new_context)
def transform_Div(e: Div) -> Div:
def transform_Div(self, e: Div) -> Union[Div, Group, Null]:
e.content = self.transform(e.content)
if "group" in e.classes:
# `.group` class for Divs
# Content of Div is enclosed in a separate context, all attributes are passed as metadata
new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted)
for attribute, value in e.attributes:
new_context.set_metadata(attribute, value)
return self.create_Group(*e.content, new_context)
for attribute, value in e.attributes.items():
new_context.set_metadata(attribute, value) # FIXME: This raises a warning when done with `language`. Since context is available to OG, we should trash the warning and rework the OG to use the Context.
return self.create_Group(*e.content, new_context=new_context)
if "c" in e.attributes:
# Commands can be called multiple ways, this handles the following syntax:
@ -344,21 +381,21 @@ class TransformProcessor:
trusted = False
if not self.context.trusted:
trusted = False
return self.create_Group(*e.content, Context(includedDoc, path, self.context, trusted=trusted))
return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, path, self.context, trusted=trusted))
return e
def transform_Span(e: Span) -> Span:
def transform_Span(self, e: Span) -> Span:
e.content = self.transform(e.content)
if "group" in e.classes:
# `.group` class for Spans
# Content of Span is enclosed in a separate context, all attributes are passed as metadata
# FIXME: This probably doesn't work... Will we error?
new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted)
for attribute, value in e.attributes:
new_context.set_metadata(attribute, value)
return self.create_Group(*e.content, new_context)
# TODO: This sadly doesn't work. We would need to create a separate class InlineGroup, that would be Inline.
#if "group" in e.classes:
# # `.group` class for Spans
# # Content of Span is enclosed in a separate context, all attributes are passed as metadata
# new_context = Context(Doc(), self.context.path, self.context, trusted=self.context.trusted)
# for attribute, value in e.attributes.items():
# new_context.set_metadata(attribute, value)
# return self.create_Group(*e.content, new_context=new_context)
if "c" in e.attributes:
# Commands can be called multiple ways, this handles the following syntax:
@ -366,54 +403,118 @@ class TransformProcessor:
e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
return self.transform(e)
if len(e.content) == 1 and isinstance(e.content[0], Str):
## Handle special command shorthand [!commandname]{}
if re.match(r"^![\w.]+$", e.content[0].text):
e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]})
return self.transform(e)
## Handle import [#path/file.md]{}
# This is the exact opposite of partials. We take the commands, flags
# and metadata but drop the content.
elif re.match(r"^#.+$", e.content[0].text):
importedDoc = import_md(open(self.context.dir + "/" + e.content[0].text[1:], "r").read())
self.transform(importedDoc.content)
return nullify(e)
## Handle metadata print [$key1.key2]{}
# This is a shorthand for just printing the content of some metadata.
elif re.match(r"^\$[\w.]+$", e.content[0].text):
val = self.context.get_metadata(e.content[0].text[1:], False)
if isinstance(val, MetaInlines):
e = Span(*val.content)
e = self.transform(e)
elif isinstance(val, MetaString):
e = Span(Str(val.string))
elif isinstance(val, MetaBool):
e = Span(Str(str(val.boolean)))
else:
raise TypeError(f"Cannot print value of metadatum '{e.content[0].text[1:]}' of type '{type(val)}'")
return e
return e
def transform_CodeBlock(e: CodeBlock) -> CodeBlock:
def transform_CodeBlock(self, e: CodeBlock) -> Union[CodeBlock, Div, Null]:
if "markdown" in e.classes and "group" in e.classes:
includedDoc = import_md(e.text)
return self.create_Group(*includedDoc.content, context=Context(includedDoc, self.context.path, self.context, self.context.trusted))
return self.create_Group(*includedDoc.content, new_context=Context(includedDoc, self.context.path, self.context, self.context.trusted))
if "python" in e.classes and "run" in e.classes:
if not self.context.trusted:
return nullify(e)
command_output = parse_command(e.text)(BlockCommand(), self.context)
e = BlockCommand().replaceSelf(*([] if command_output is None else command_output))
return self.transform(e)
if "python" in e.classes and ("define" in e.attributes or "redefine" in e.attributes):
if not self.context.trusted:
return nullify(e)
return handle_command_define(e, self.context)
# Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks
# FIXME? Passing down attributes explicitly no longer needed, because OG now has access to Context.
if not "highlight" in e.attributes:
e.attributes["highlight"] = self.context.get_metadata("highlight") if self.context.get_metadata("highlight") is not None else True
if not "style" in e.attributes:
e.attributes["style"] = self.context.get_metadata("highlight-style") if self.context.get_metadata("highlight-style") is not None else "default"
e.attributes["noclasses"] = False
# I think this is supposed to enable inline styles for highlighting when the style differs from the document, but it clearly doesn't work. a) HTML_generator never accesses it and b) Only the top-level document contains a style so you have to ask the top level context, not the current context.
else:
e.attributes["noclasses"] = True
return e
def transform_Command(self, e: Command) -> Union[Div, Span]:
if not self.context.get_command(e.attributes["c"]):
raise NameError(f"Command not defined '{e.attributes['c']}'.")
command_output = self.context.get_command(e.attributes["c"])(e, self.context)
e = e.replaceSelf(*command_output)
return self.transform(e)
def transform_InlineCommand(self, e: InlineCommand) -> Span:
return self.transform_Command(e)
def transform_BlockCommand(self, e: BlockCommand) -> Div:
return self.transform_Command(e)
def transform_Whitespace(e: Whitespace) -> Whitespace:
def transform_Whitespace(self, e: Whitespace) -> Whitespace:
if bavlna(e, self.context):
return NBSP()
else:
return e
def transform_SoftBreak(e: SoftBreak) -> SoftBreak:
def transform_SoftBreak(self, e: SoftBreak) -> Whitespace:
return self.transform_Whitespace(e)
def transform_Space(e: Space) -> Space:
def transform_Space(self, e: Space) -> Whitespace:
return self.transform_Whitespace(e)
def transform_NBSP(e: NBSP) -> NBSP:
def transform_NBSP(self, e: NBSP) -> NBSP:
return e
def transform_Str(e: Str) -> Str:
def transform_Str(self, e: Str) -> Str:
return e
def transform_RawInline(e: RawInline) -> RawInline:
def transform_RawInline(self, e: RawInline) -> RawInline:
return e
def transform_Math(e: Math) -> Math:
def transform_Math(self, e: Math) -> Math:
return e
def transform_LineBreak(e: LineBreak) -> LineBreak:
def transform_LineBreak(self, e: LineBreak) -> LineBreak:
return e
def transform_Code(e: Code) -> Code:
def transform_Code(self, e: Code) -> Code:
return e
def transform_RawBlock(e: RawBlock) -> RawBlock:
def transform_RawBlock(self, e: RawBlock) -> RawBlock:
return e
def transform_Null(e: Null) -> Null:
def transform_Null(self, e: Null) -> Null:
return e
def transform_HorizontalRule(e: HorizontalRule) -> HorizontalRule:
def transform_HorizontalRule(self, e: HorizontalRule) -> HorizontalRule:
return e

13
test/test.md

@ -38,7 +38,7 @@ context.set_flag("cat", True)
context.set_metadata("a", {})
context.set_metadata("a.b", {})
context.set_metadata("a.b.c", "Bruh **bruh** bruh")
return [*parse_string("The main document's title is "), fe.FQuoted(*parse_string(context.get_metadata('title')), style="simple"), pf.Str(".")]
return [*parse_string("The main document's title is "), pf.Quoted(*parse_string(context.get_metadata('title')), quote_type="SingleQuote"), pf.Str(".")]
```
```python {style=native}
@ -115,6 +115,17 @@ $$
---
In this text, there might be some phrases [v češtině]{.group language=cs} and <!-- Tohle nefunguje :( -->
maybe even
:::{.group language=cs}
celé pasáže textu v češtině.
Růžový bagr bez zeleného bagru se žlutým bagrem.
Yay můžeme mít izolované kontexty bez opakovaného volání pandocu jupí!
:::
This should be seen by all.^[This is a footnote]
| Matematicko-fyzikální fakulta University Karlovy

Loading…
Cancel
Save