diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index c989a4d..240295b 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -65,6 +65,10 @@ def main(): #OutputGenerator(open("/tmp/doc1", "w")).generate(doc1) #OutputGenerator(open("/tmp/doc2", "w")).generate(doc2) + from panflute import convert_text + from .pandoc_processor import PandocProcessor + open("/tmp/bruhecek.md", "w").write(convert_text(PandocProcessor().transform(doc2), input_format="panflute", output_format="markdown")) + if args.debug: print(show(doc)) diff --git a/src/formatitko/nop_processor.py b/src/formatitko/nop_processor.py new file mode 100644 index 0000000..2f09ffb --- /dev/null +++ b/src/formatitko/nop_processor.py @@ -0,0 +1,365 @@ +from panflute import Element, ListContainer, Inline, Block +from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, Superscript, Underline +from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead +from panflute import TableRow, TableCell, Caption, Doc +from panflute import MetaValue +from typing import Union, Callable + +from .whitespace import NBSP +from .elements import FQuoted +from .context import Group, InlineGroup, BlockGroup +from .whitespace import Whitespace +from .command import BlockCommand, InlineCommand, CodeCommand, Command + +ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]] + + +class NOPProcessor: + TYPE_DICT: dict[type, Callable] + + class UnknownElementError(Exception): + f"An unknown Element has been passed to the NOPProcessor, probably because panflute introduced a new one." + pass + + def __init__(self): + self.TYPE_DICT = { + TableRow: self.transform_TableRow, + TableCell: self.transform_TableCell, + Caption: self.transform_Caption, + Doc: self.transform_Doc, + LineItem: self.transform_LineItem, + ListItem: self.transform_ListItem, + + BlockQuote: self.transform_BlockQuote, + BulletList: self.transform_BulletList, + Citation: self.transform_Citation, + CodeBlock: self.transform_CodeBlock, + Definition: self.transform_Definition, + DefinitionItem: self.transform_DefinitionItem, + DefinitionList: self.transform_DefinitionList, + Div: self.transform_Div, + Figure: self.transform_Figure, + Header: self.transform_Header, + HorizontalRule: self.transform_HorizontalRule, + LineBlock: self.transform_LineBlock, + MetaBlocks: self.transform_MetaBlocks, + MetaBool: self.transform_MetaBool, + MetaInlines: self.transform_MetaInlines, + MetaList: self.transform_MetaList, + MetaMap: self.transform_MetaMap, + MetaString: self.transform_MetaString, + Null: self.transform_Null, + OrderedList: self.transform_OrderedList, + Para: self.transform_Para, + Plain: self.transform_Plain, + RawBlock: self.transform_RawBlock, + Table: self.transform_Table, + TableBody: self.transform_TableBody, + TableFoot: self.transform_TableFoot, + TableHead: self.transform_TableHead, + Group: self.transform_Group, + InlineGroup: self.transform_InlineGroup, + BlockGroup: self.transform_BlockGroup, + + Cite: self.transform_Cite, + Code: self.transform_Code, + Emph: self.transform_Emph, + Image: self.transform_Image, + LineBreak: self.transform_LineBreak, + Link: self.transform_Link, + Math: self.transform_Math, + Note: self.transform_Note, + Quoted: self.transform_Quoted, + RawInline: self.transform_RawInline, + SmallCaps: self.transform_SmallCaps, + SoftBreak: self.transform_SoftBreak, + Space: self.transform_Space, + Span: self.transform_Span, + Str: self.transform_Str, + Strikeout: self.transform_Strikeout, + Strong: self.transform_Strong, + Subscript: self.transform_Subscript, + Superscript: self.transform_Superscript, + Underline: self.transform_Underline, + NBSP: self.transform_NBSP, + FQuoted: self.transform_FQuoted, + + InlineCommand: self.transform_InlineCommand, + BlockCommand: self.transform_BlockCommand, + CodeCommand: self.transform_CodeCommand + } + + def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]: + return [] + + def get_posttransformers(self) -> list[Callable[[ELCl],ELCl]]: + return [] + + def transform(self, e: ELCl) -> ELCl: + if isinstance(e, list): + return self.transform_list(e) + elif isinstance(e, ListContainer): + return self.transform_ListContainer(e) + + for transformer in self.get_pretransformers(): + e = transformer(e) + + try: + e = self.TYPE_DICT[type(e)](e) + except KeyError: + raise self.UnknownElementError(type(e)) + + for transformer in self.get_posttransformers(): + e = transformer(e) + + return e + + def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: + for i in range(len(e)): + e[i] = self.transform(e[i]) + return e + + def transform_ListContainer(self, e: ListContainer) -> ListContainer: + for i in range(len(e)): + e[i] = self.transform(e[i]) + return e + + + def transform_TableRow(self, e: TableRow) -> TableRow: + e.content = self.transform(e.content) + return e + + def transform_TableCell(self, e: TableCell) -> TableCell: + e.content = self.transform(e.content) + return e + + def transform_Caption(self, e: Caption) -> Caption: + e.content = self.transform(e.content) + return e + + def transform_LineItem(self, e: LineItem) -> LineItem: + e.content = self.transform(e.content) + return e + + def transform_ListItem(self, e: ListItem) -> ListItem: + e.content = self.transform(e.content) + return e + + def transform_BlockQuote(self, e: BlockQuote) -> BlockQuote: + e.content = self.transform(e.content) + return e + + def transform_BulletList(self, e: BulletList) -> BulletList: + e.content = self.transform(e.content) + return e + + def transform_Citation(self, e: Citation) -> Citation: + e.content = self.transform(e.content) + return e + + def transform_Definition(self, e: Definition) -> Definition: + e.content = self.transform(e.content) + return e + + def transform_DefinitionItem(self, e: DefinitionItem) -> DefinitionItem: + e.content = self.transform(e.content) + return e + + def transform_DefinitionList(self, e: DefinitionList) -> DefinitionList: + e.content = self.transform(e.content) + return e + + def transform_Header(self, e: Header) -> Header: + e.content = self.transform(e.content) + return e + + def transform_LineBlock(self, e: LineBlock) -> LineBlock: + e.content = self.transform(e.content) + return e + + def transform_MetaBlocks(self, e: MetaBlocks) -> MetaBlocks: + e.content = self.transform(e.content) + return e + + def transform_MetaBool(self, e: MetaBool) -> MetaBool: + e.content = self.transform(e.content) + return e + + def transform_MetaInlines(self, e: MetaInlines) -> MetaInlines: + e.content = self.transform(e.content) + return e + + def transform_MetaList(self, e: MetaList) -> MetaList: + e.content = self.transform(e.content) + return e + + def transform_MetaMap(self, e: MetaMap) -> MetaMap: + e.content = self.transform(e.content) + return e + + def transform_MetaString(self, e: MetaString) -> MetaString: + e.content = self.transform(e.content) + return e + + def transform_OrderedList(self, e: OrderedList) -> OrderedList: + e.content = self.transform(e.content) + return e + + def transform_Para(self, e: Para) -> Para: + e.content = self.transform(e.content) + return e + + def transform_Plain(self, e: Plain) -> Plain: + e.content = self.transform(e.content) + return e + + def transform_TableBody(self, e: TableBody) -> TableBody: + e.content = self.transform(e.content) + return e + + def transform_TableFoot(self, e: TableFoot) -> TableFoot: + e.content = self.transform(e.content) + return e + + def transform_TableHead(self, e: TableHead) -> TableHead: + e.content = self.transform(e.content) + return e + + def transform_Group(self, e: Group) -> Group: + e.content = self.transform(e.content) + return e + + def transform_InlineGroup(self, e: InlineGroup) -> InlineGroup: + e.content = self.transform(e.content) + return e + + def transform_BlockGroup(self, e: BlockGroup) -> BlockGroup: + e.content = self.transform(e.content) + return e + + def transform_Cite(self, e: Cite) -> Cite: + e.content = self.transform(e.content) + return e + + def transform_Emph(self, e: Emph) -> Emph: + e.content = self.transform(e.content) + return e + + def transform_Link(self, e: Link) -> Link: + e.content = self.transform(e.content) + return e + + def transform_Note(self, e: Note) -> Note: + e.content = self.transform(e.content) + return e + + def transform_SmallCaps(self, e: SmallCaps) -> SmallCaps: + e.content = self.transform(e.content) + return e + + def transform_Strikeout(self, e: Strikeout) -> Strikeout: + e.content = self.transform(e.content) + return e + + def transform_Strong(self, e: Strong) -> Strong: + e.content = self.transform(e.content) + return e + + def transform_Subscript(self, e: Subscript) -> Subscript: + e.content = self.transform(e.content) + return e + + def transform_Superscript(self, e: Superscript) -> Superscript: + e.content = self.transform(e.content) + return e + + def transform_Underline(self, e: Underline) -> Underline: + e.content = self.transform(e.content) + return e + + def transform_FQuoted(self, e: FQuoted) -> FQuoted: + e.content = self.transform(e.content) + return e + + def transform_Figure(self, e: Figure) -> Figure: + e.content = self.transform(e.content) + e.caption = self.transform(e.caption) + return e + + def transform_Table(self, e: Table) -> Table: + e.head = self.transform(e.head) + e.content = self.transform(e.content) + e.foot = self.transform(e.foot) + return e + + def transform_Doc(self, e: Doc) -> Doc: + e.content = self.transform(e.content) + return e + + def transform_Quoted(self, e: Quoted) -> Quoted: + e.content = self.transform(e.content) + return e + + def transform_Image(self, e: Image) -> Image: + e.content = self.transform(e.content) + return e + + def transform_Div(self, e: Div) -> Div: + e.content = self.transform(e.content) + return e + + def transform_Span(self, e: Span) -> Span: + e.content = self.transform(e.content) + return e + + def transform_CodeBlock(self, e: CodeBlock) -> CodeBlock: + return e + + def transform_Command(self, e: Command) -> Command: + e.content = self.transform(e.content) + return e + + def transform_InlineCommand(self, e: InlineCommand) -> Span: + return self.transform_Command(e) + + def transform_BlockCommand(self, e: BlockCommand) -> Div: + return self.transform_Command(e) + + def transform_CodeCommand(self, e: CodeCommand) -> Div: + return self.transform_Command(e) + + def transform_Whitespace(self, e: Whitespace) -> Whitespace: + return e + + def transform_SoftBreak(self, e: SoftBreak) -> Whitespace: + return self.transform_Whitespace(e) + + def transform_Space(self, e: Space) -> Whitespace: + return self.transform_Whitespace(e) + + def transform_NBSP(self, e: NBSP) -> NBSP: + return e + + def transform_Str(self, e: Str) -> Str: + return e + + def transform_RawInline(self, e: RawInline) -> RawInline: + return e + + def transform_Math(self, e: Math) -> Math: + return e + + def transform_LineBreak(self, e: LineBreak) -> LineBreak: + return e + + def transform_Code(self, e: Code) -> Code: + return e + + def transform_RawBlock(self, e: RawBlock) -> RawBlock: + return e + + def transform_Null(self, e: Null) -> Null: + return e + + def transform_HorizontalRule(self, e: HorizontalRule) -> HorizontalRule: + return e diff --git a/src/formatitko/pandoc_processor.py b/src/formatitko/pandoc_processor.py new file mode 100644 index 0000000..32ffb8f --- /dev/null +++ b/src/formatitko/pandoc_processor.py @@ -0,0 +1,40 @@ +from .nop_processor import NOPProcessor +from panflute import Div, Span, Null, Str, Plain, Quoted +from .context import Group, InlineGroup, BlockGroup +from .elements import FQuoted +from .nop_processor import ELCl + +from typing import Callable + +import sys +def eprint(*args, **kwargs): + print(*args, **kwargs, file=sys.stderr) + +class PandocProcessor(NOPProcessor): + + def get_posttransformers(self) -> list[Callable[[ELCl],ELCl]]: + return super().get_posttransformers()+[self.sanitize_attributes] + + def sanitize_attributes(self, e: ELCl) -> ELCl: + if hasattr(e, "attributes"): + for key, value in e.attributes.items(): + e.attributes[key] = str(value) + return e + + def transform_Group(self, e: Group) -> Group: + e.content = self.transform(e.content) + return Div(*e.content) + + def transform_InlineGroup(self, e: InlineGroup) -> InlineGroup: + e.content = self.transform(e.content) + return Span(*e.content) + + def transform_BlockGroup(self, e: BlockGroup) -> BlockGroup: + e.content = self.transform(e.content) + return Div(*e.content) + + def transform_Null(self, e: Null): + return Plain(Str("")) + + def transform_FQuoted(self, e: FQuoted) -> Quoted: + return Quoted(*e.content) diff --git a/src/formatitko/transform_processor.py b/src/formatitko/transform_processor.py index c8db234..32a0306 100644 --- a/src/formatitko/transform_processor.py +++ b/src/formatitko/transform_processor.py @@ -20,123 +20,32 @@ from .context import Context, CommandCallable from .whitespace import Whitespace, bavlna from .command import BlockCommand, InlineCommand, CodeCommand, Command from .command_util import handle_command_define, parse_command - -ELCl = Union[Element, ListContainer, list[Union[Element, ListContainer]]] - -class UnknownElementError(Exception): - "An unknown Element has been passed to the TransformProcessor, probably because panflute introduced a new one." - pass +from .nop_processor import NOPProcessor, ELCl class DoubleDocError(Exception): "TransformProcessor should only ever see a single Doc." pass -class TransformProcessor: +class TransformProcessor(NOPProcessor): context: Union[Context, None] = None root_file_path: str root_highlight_style: str = "default" _command_modules: list[tuple[Union[dict[str, CommandCallable], ModuleType], str]] = [] - TYPE_DICT: dict[type, Callable] - def __init__(self, root_file_path: str): - self.root_file_path = root_file_path + class UnknownElementError(Exception): + "An unknown Element has been passed to the TransformProcessor, probably because panflute introduced a new one." + pass - self.TYPE_DICT = { - TableRow: self.transform_TableRow, - TableCell: self.transform_TableCell, - Caption: self.transform_Caption, - Doc: self.transform_Doc, - LineItem: self.transform_LineItem, - ListItem: self.transform_ListItem, - - BlockQuote: self.transform_BlockQuote, - BulletList: self.transform_BulletList, - Citation: self.transform_Citation, - CodeBlock: self.transform_CodeBlock, - Definition: self.transform_Definition, - DefinitionItem: self.transform_DefinitionItem, - DefinitionList: self.transform_DefinitionList, - Div: self.transform_Div, - Figure: self.transform_Figure, - Header: self.transform_Header, - HorizontalRule: self.transform_HorizontalRule, - LineBlock: self.transform_LineBlock, - MetaBlocks: self.transform_MetaBlocks, - MetaBool: self.transform_MetaBool, - MetaInlines: self.transform_MetaInlines, - MetaList: self.transform_MetaList, - MetaMap: self.transform_MetaMap, - MetaString: self.transform_MetaString, - Null: self.transform_Null, - OrderedList: self.transform_OrderedList, - Para: self.transform_Para, - Plain: self.transform_Plain, - RawBlock: self.transform_RawBlock, - Table: self.transform_Table, - TableBody: self.transform_TableBody, - TableFoot: self.transform_TableFoot, - TableHead: self.transform_TableHead, - Group: self.transform_Group, - InlineGroup: self.transform_InlineGroup, - BlockGroup: self.transform_BlockGroup, - - Cite: self.transform_Cite, - Code: self.transform_Code, - Emph: self.transform_Emph, - Image: self.transform_Image, - LineBreak: self.transform_LineBreak, - Link: self.transform_Link, - Math: self.transform_Math, - Note: self.transform_Note, - Quoted: self.transform_Quoted, - RawInline: self.transform_RawInline, - SmallCaps: self.transform_SmallCaps, - SoftBreak: self.transform_SoftBreak, - Space: self.transform_Space, - Span: self.transform_Span, - Str: self.transform_Str, - Strikeout: self.transform_Strikeout, - Strong: self.transform_Strong, - Subscript: self.transform_Subscript, - Superscript: self.transform_Superscript, - Underline: self.transform_Underline, - NBSP: self.transform_NBSP, - FQuoted: self.transform_FQuoted, - - InlineCommand: self.transform_InlineCommand, - BlockCommand: self.transform_BlockCommand, - CodeCommand: self.transform_CodeCommand - } + def __init__(self, root_file_path: str, *args, **kwargs): + self.root_file_path = root_file_path + super().__init__(*args, **kwargs) def add_command_module(self, module: Union[dict[str, CommandCallable], ModuleType], module_name: str=""): self._command_modules.append((module, module_name)) def get_pretransformers(self) -> list[Callable[[ELCl],ELCl]]: - return [self.handle_if_attribute, self.handle_ifnot_attribute] - - def get_posttransformers(self) -> list[Callable[[ELCl],ELCl]]: - return [] - - def transform(self, e: ELCl) -> ELCl: - if isinstance(e, list): - return self.transform_list(e) - elif isinstance(e, ListContainer): - return self.transform_ListContainer(e) - - for transformer in self.get_pretransformers(): - e = transformer(e) - - try: - e = self.TYPE_DICT[type(e)](e) - except KeyError: - raise UnknownElementError(type(e)) - - for transformer in self.get_posttransformers(): - e = transformer(e) - - return e - + return super().get_pretransformers()+[self.handle_if_attribute, self.handle_ifnot_attribute] def handle_if_attribute(self, e: ELCl) -> ELCl: # `if` attribute. Only show this element if flag is set. @@ -153,185 +62,6 @@ class TransformProcessor: return e - def transform_list(self, e: list[Union[Element, ListContainer]]) -> list[Union[Element, ListContainer]]: - for i in range(len(e)): - e[i] = self.transform(e[i]) - return e - - def transform_ListContainer(self, e: ListContainer) -> ListContainer: - for i in range(len(e)): - e[i] = self.transform(e[i]) - return e - - - def transform_TableRow(self, e: TableRow) -> TableRow: - e.content = self.transform(e.content) - return e - - def transform_TableCell(self, e: TableCell) -> TableCell: - e.content = self.transform(e.content) - return e - - def transform_Caption(self, e: Caption) -> Caption: - e.content = self.transform(e.content) - return e - - def transform_LineItem(self, e: LineItem) -> LineItem: - e.content = self.transform(e.content) - return e - - def transform_ListItem(self, e: ListItem) -> ListItem: - e.content = self.transform(e.content) - return e - - def transform_BlockQuote(self, e: BlockQuote) -> BlockQuote: - e.content = self.transform(e.content) - return e - - def transform_BulletList(self, e: BulletList) -> BulletList: - e.content = self.transform(e.content) - return e - - def transform_Citation(self, e: Citation) -> Citation: - e.content = self.transform(e.content) - return e - - def transform_Definition(self, e: Definition) -> Definition: - e.content = self.transform(e.content) - return e - - def transform_DefinitionItem(self, e: DefinitionItem) -> DefinitionItem: - e.content = self.transform(e.content) - return e - - def transform_DefinitionList(self, e: DefinitionList) -> DefinitionList: - e.content = self.transform(e.content) - return e - - def transform_Header(self, e: Header) -> Header: - e.content = self.transform(e.content) - return e - - def transform_LineBlock(self, e: LineBlock) -> LineBlock: - e.content = self.transform(e.content) - return e - - def transform_MetaBlocks(self, e: MetaBlocks) -> MetaBlocks: - e.content = self.transform(e.content) - return e - - def transform_MetaBool(self, e: MetaBool) -> MetaBool: - e.content = self.transform(e.content) - return e - - def transform_MetaInlines(self, e: MetaInlines) -> MetaInlines: - e.content = self.transform(e.content) - return e - - def transform_MetaList(self, e: MetaList) -> MetaList: - e.content = self.transform(e.content) - return e - - def transform_MetaMap(self, e: MetaMap) -> MetaMap: - e.content = self.transform(e.content) - return e - - def transform_MetaString(self, e: MetaString) -> MetaString: - e.content = self.transform(e.content) - return e - - def transform_OrderedList(self, e: OrderedList) -> OrderedList: - e.content = self.transform(e.content) - return e - - def transform_Para(self, e: Para) -> Para: - e.content = self.transform(e.content) - return e - - def transform_Plain(self, e: Plain) -> Plain: - e.content = self.transform(e.content) - return e - - def transform_TableBody(self, e: TableBody) -> TableBody: - e.content = self.transform(e.content) - return e - - def transform_TableFoot(self, e: TableFoot) -> TableFoot: - e.content = self.transform(e.content) - return e - - def transform_TableHead(self, e: TableHead) -> TableHead: - e.content = self.transform(e.content) - return e - - def transform_Group(self, e: Group) -> Group: - e.content = self.transform(e.content) - return e - - def transform_InlineGroup(self, e: InlineGroup) -> InlineGroup: - e.content = self.transform(e.content) - return e - - def transform_BlockGroup(self, e: BlockGroup) -> BlockGroup: - e.content = self.transform(e.content) - return e - - def transform_Cite(self, e: Cite) -> Cite: - e.content = self.transform(e.content) - return e - - def transform_Emph(self, e: Emph) -> Emph: - e.content = self.transform(e.content) - return e - - def transform_Link(self, e: Link) -> Link: - e.content = self.transform(e.content) - return e - - def transform_Note(self, e: Note) -> Note: - e.content = self.transform(e.content) - return e - - def transform_SmallCaps(self, e: SmallCaps) -> SmallCaps: - e.content = self.transform(e.content) - return e - - def transform_Strikeout(self, e: Strikeout) -> Strikeout: - e.content = self.transform(e.content) - return e - - def transform_Strong(self, e: Strong) -> Strong: - e.content = self.transform(e.content) - return e - - def transform_Subscript(self, e: Subscript) -> Subscript: - e.content = self.transform(e.content) - return e - - def transform_Superscript(self, e: Superscript) -> Superscript: - e.content = self.transform(e.content) - return e - - def transform_Underline(self, e: Underline) -> Underline: - e.content = self.transform(e.content) - return e - - def transform_FQuoted(self, e: FQuoted) -> FQuoted: - e.content = self.transform(e.content) - return e - - def transform_Figure(self, e: Figure) -> Figure: - e.content = self.transform(e.content) - e.caption = self.transform(e.caption) - return e - - def transform_Table(self, e: Table) -> Table: - e.head = self.transform(e.head) - e.content = self.transform(e.content) - e.foot = self.transform(e.foot) - return e - - def transform_Doc(self, e: Doc) -> Doc: if self.context is not None: raise DoubleDocError() @@ -531,55 +261,9 @@ class TransformProcessor: e = e.replaceSelf(*([] if command_output is None else command_output)) return self.transform(e) - def transform_InlineCommand(self, e: InlineCommand) -> Span: - return self.transform_Command(e) - - def transform_BlockCommand(self, e: BlockCommand) -> Div: - return self.transform_Command(e) - - def transform_CodeCommand(self, e: CodeCommand) -> Div: - return self.transform_Command(e) - def transform_Whitespace(self, e: Whitespace) -> Whitespace: if bavlna(e, self.context): return NBSP() else: return e - def transform_SoftBreak(self, e: SoftBreak) -> Whitespace: - return self.transform_Whitespace(e) - - def transform_Space(self, e: Space) -> Whitespace: - return self.transform_Whitespace(e) - - - def transform_NBSP(self, e: NBSP) -> NBSP: - return e - - def transform_Str(self, e: Str) -> Str: - return e - - def transform_RawInline(self, e: RawInline) -> RawInline: - return e - - def transform_Math(self, e: Math) -> Math: - return e - - def transform_LineBreak(self, e: LineBreak) -> LineBreak: - return e - - def transform_Code(self, e: Code) -> Code: - return e - - def transform_RawBlock(self, e: RawBlock) -> RawBlock: - return e - - def transform_Null(self, e: Null) -> Null: - return e - - def transform_HorizontalRule(self, e: HorizontalRule) -> HorizontalRule: - return e - - - -