diff --git a/formatitko.tex b/formatitko.tex index e48135a..47c5605 100644 --- a/formatitko.tex +++ b/formatitko.tex @@ -1,9 +1,6 @@ \input luatex85.sty \input ucwmac2.tex -\ucwmodule{luaofs} -\ucwmodule{link} -\ucwmodule{verb} -\parskip=3pt plus 2pt minus 1pt +\parskip=5pt plus 3pt minus 2pt \parindent=0sp \def\strong#1{{% @@ -17,23 +14,17 @@ \def\superscript#1{\leavevmode\raise3pt\hbox{\fiverm#1}} +\def\subscript#1{\leavevmode\lower1pt\hbox{\fiverm#1}} \newcount\fncount \fncount=1 \def\fnmark{\superscript{\the\fncount}} \def\fn#1{\footnote\fnmark{#1}\advance\fncount by 1} -\def\hA#1{{\parskip1em\settextsize{14}\bf #1}} -\def\hB#1{{\parskip1em\settextsize{12}\bf #1}} -\def\hC#1{{\parskip1em\settextsize{10}\bf #1}} -\def\hD#1{{\parskip1em\settextsize{10}\bi #1}} -\def\hr{{\vskip5pt\hrule\vskip5pt}} +\def\section#1{{\parskip1em\settextsize{18}\bf #1}} +\def\subsection#1{{\parskip1em\settextsize{16}\bf #1}} +\def\subsubsection#1{{\parskip1em\settextsize{14}\bf #1}} +\def\subsubsubsection#1{{\parskip1em\settextsize{12}\bf #1}} +\def\subsubsubsubsection#1{{\parskip1em\settextsize{10}\bf #1}} +\def\subsubsubsubsubsection#1{{\parskip1em\settextsize{10}\bi #1}} \long\def\blockquote#1{\vskip\lineskip\vskip\parskip\hbox{\vrule\hskip5pt\vbox{#1}}} -\let\code\verbatim -\let\codeblock\verbatim -\def\subscript#1{\leavevmode\lower1pt\hbox{\fiverm#1}} \def\strikeout#1{FIXME: Strikeout not implemented} \def\underline#1{FIXME: Underline not implemented} -\def\figure#1#2{\vskip5pt\centerline{#1}\centerline{#2}\vskip5pt} -\def\figcaption#1{{\it #1}} -\let\image\putimage -\def\languagecs{} % KSP should define this to \cze probably -\def\languageen{} % KSP should define this to \eng probably diff --git a/src/formatitko/formatitko.py b/src/formatitko/formatitko.py index e192c86..50b7685 100755 --- a/src/formatitko/formatitko.py +++ b/src/formatitko/formatitko.py @@ -15,6 +15,7 @@ from .output_generator import OutputGenerator from .html_generator import HTMLGenerator from .transform_processor import TransformProcessor from .pandoc_processor import PandocProcessor +from .tex_generator import UCWTexGenerator from panflute import convert_text @@ -49,13 +50,20 @@ def main(): if args.output_html is not None: # Initialize KaTeX client (this runs the node app and connects to a unix socket) with KatexClient() as katexClient: - HTMLGenerator(open(args.output_html, "w"), katexClient, imageProcessor).generate(doc) + with open(args.output_html, "w") as file: + HTMLGenerator(file, katexClient, imageProcessor).generate(doc) + + if args.output_tex is not None: + with open(args.output_tex, "w") as file: + UCWTexGenerator(file, imageProcessor).generate(doc) if args.output_md is not None: - open(args.output_md, "w").write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="markdown")) + with open(args.output_md, "w") as file: + file.write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="markdown")) if args.output_json is not None: - open(args.output_json, "w").write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="json")) + with open(args.output_json, "w") as file: + file.write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="json")) if args.debug: print("-----------------------------------") diff --git a/src/formatitko/html_generator.py b/src/formatitko/html_generator.py index 25f3305..36c96f9 100644 --- a/src/formatitko/html_generator.py +++ b/src/formatitko/html_generator.py @@ -243,7 +243,7 @@ class HTMLGenerator(OutputGenerator): "DisplayMath": True, "InlineMath": False } - self.writeln(self.katexClient.render(e.text, {"displayMode": formats[e.format]})) + self.writeraw(self.katexClient.render(e.text, {"displayMode": formats[e.format]})) def generate_RawInline(self, e: RawInline): if e.format == "html": @@ -291,13 +291,13 @@ class HTMLGenerator(OutputGenerator): attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};" self.generate_simple_tag(e, attributes=attributes) + def generate_Cite(self, e: Cite): + self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"}) + # These are also disabled in pandoc so they shouldn't appear in the AST at all. def generate_Citation(self, e: Citation): self.writeln("") - def generate_Cite(self, e: Cite): - self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"}) - def generate_Definition(self, e: Definition): self.writeln("") diff --git a/src/formatitko/output_generator.py b/src/formatitko/output_generator.py index c84ec0e..61df1c6 100644 --- a/src/formatitko/output_generator.py +++ b/src/formatitko/output_generator.py @@ -16,7 +16,7 @@ class UnknownElementError(Exception): pass class OutputGenerator: - _at_start_of_line: bool + _empty_lines: int context: Union[Context, None] indent_level: int indent_str: str @@ -29,7 +29,7 @@ class OutputGenerator: self.output_file = output_file self.indent_str = indent_str self.indent_level = initial_indent_level - self._at_start_of_line = True + self._empty_lines = 1 self.context = None self.TYPE_DICT_MISC = { @@ -136,29 +136,32 @@ class OutputGenerator: def indent_less(self): self.indent_level -= 1 - def write(self, text: str): - if self._at_start_of_line: + def write(self, text: str=""): + if self._empty_lines > 0: self.output_file.write(self.indent()) self.output_file.write(text) - self._at_start_of_line = False + self._empty_lines = 0 - def writeln(self, text: str): - if not self._at_start_of_line: + def writeln(self, text: str=""): + if self._empty_lines == 0: self.output_file.write("\n") self.output_file.write(self.indent()) self.output_file.write(text+"\n") - self._at_start_of_line = True + self._empty_lines = 1 - def writeraw(self, text: str): - if not self._at_start_of_line: + def writeraw(self, text: str=""): + if self._empty_lines == 0: self.output_file.write("\n") self.output_file.write(text+"\n") - self._at_start_of_line = True + self._empty_lines = 1 - def endln(self): - if not self._at_start_of_line: + def ensure_empty(self, n: int=1): + while self._empty_lines < n: self.output_file.write("\n") - self._at_start_of_line = True + self._empty_lines+=1 + + def endln(self): + self.ensure_empty(1) def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str: return tag diff --git a/src/formatitko/tex_generator.py b/src/formatitko/tex_generator.py new file mode 100644 index 0000000..59f1572 --- /dev/null +++ b/src/formatitko/tex_generator.py @@ -0,0 +1,360 @@ +from panflute import Element, ListContainer, Inline, Block +from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, Superscript, Underline +from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead +from panflute import TableRow, TableCell, Caption, Doc +from panflute import MetaValue +from typing import Union + +import os + +from .output_generator import OutputGenerator +from .images import ImageProcessor + +from .whitespace import NBSP +from .elements import FQuoted +from .context import Group, InlineGroup, BlockGroup, Context +from .util import inlinify + +class UCWTexGenerator(OutputGenerator): + imageProcessor: ImageProcessor + _bold: int + _italic: int + + def __init__(self, output_file, imageProcessor: ImageProcessor, *args, **kwargs): + self.imageProcessor = imageProcessor + self._bold = 0 + self._italic = 0 + super().__init__(output_file, *args, **kwargs) + + def escape_special_chars(self, text: str) -> str: + text = text.replace("&", r"\&") + text = text.replace("%", r"\%") + text = text.replace("$", r"\$") + text = text.replace("#", r"\#") + text = text.replace("_", r"\_") + text = text.replace("{", r"\{") + text = text.replace("}", r"\}") + text = text.replace("~", r"\textasciitilde{}") + text = text.replace("^", r"\textasciicircum{}") + text = text.replace("\\", r"\textbackslash{}") + text = text.replace(" ", "~") # We use unicode no-break spaces to force nbsp in output + text = text.replace("​", "") + return text + + def generate(self, e: Union[Element, ListContainer]): + if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex": + return + super().generate(e) + + def writepar(self, text: str): + self.ensure_empty(2) + self.writeln(text) + self.ensure_empty(2) + + def generate_Null(self, e: Null): + pass + + def generate_LineBreak(self, e: LineBreak): + self.write(r"\\") + self.endln() + + def generate_Para(self, e: Para): + self.ensure_empty(2) + self.generate(e.content) + self.ensure_empty(2) + + def generate_HorizontalRule(self, e: HorizontalRule): + self.writepar(r"\vskip5pt\hrule\hfil\vskip5pt{}") + + def generate_Doc(self, e: Doc): + self.writeln(r"\input ucwmac2.tex") + self.writeln(r"\ucwmodule{ofs}") + self.writeln(r"\ucwmodule{verb}") + self.writeln(r"\ucwmodule{link}") + self.writeln(r"\input formatitko.tex") + self.generate(e.content) + self.writeln(r"\bye") + + def get_language_macro(self, lang: str): + if lang == "cs": + return r"\chyph\lefthyphenmin=2\righthyphenmin=2{}" + elif lang == "sk": + return r"\shyph\lefthyphenmin=2\righthyphenmin=2{}" + elif lang == "en": + return r"\ehyph\lefthyphenmin=2\righthyphenmin=2{}" + else: + return "" + + def generate_InlineGroup(self, e: InlineGroup): + self.write(r"{") + self.write(self.get_language_macro(self.context.get_metadata("lang"))) + self.generate(e.content) + self.write(r"}") + + def generate_BlockGroup(self, e: BlockGroup): + self.writeln(r"\begingroup") + self.indent_more() + self.writeln(self.get_language_macro(self.context.get_metadata("lang"))) + self.generate(e.content) + self.indent_less() + self.writeln(r"\endgroup") + + def generate_Header(self, e: Header): + self.ensure_empty(2) + self.write("\\"+"sub"*(e.level-1)+"section{") + self.generate(e.content) + self.write(r"}") + self.ensure_empty(2) + + def generate_Image(self, e: Image): + url = e.url + + additional_args = self.get_image_processor_args(e.attributes) + + # The directory of the current file, will also look for images there. + source_dir = self.context.dir + + _, ext = os.path.splitext(url) + ext = ext[1:] + + # Conversions between various formats. + if ext in ["pdf", "png", "jpeg"]: + # Even supported elements have to be 'converted' because the + # processing contains finding and moving them to the cache + # directory. + url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args) + elif ext in ["svg"]: + url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + elif ext in ["epdf"]: + url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + elif ext in ["jpg"]: + url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args) + else: + url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args) + + url = self.imageProcessor.find_image(url, [self.imageProcessor.cache_dir]) + width = "" + if "width" in e.attributes: + width = e.attributes["width"] + # 50% → 0.5\hsize + if e.attributes["width"][-1] == "%": + width = str(int(e.attributes["width"][:-1])/100) + "\\hsize" + width = "width " + width + + if isinstance(e.parent.parent, Figure): + self.writeln(f'\\putimage{{{width}}}{{{url}}}') + else: + self.writepar(f'\\putimage{{{width}}}{{{url}}}') + + def generate_Code(self, e: Code): + self.write(r"\verb`") + self.write(e.text) + self.write(r"`") + + def generate_Figure(self, e: Figure): + self.ensure_empty(2) + self.writeln(r"\vskip5pt") + self.writeln(r"\centerline{") + self.indent_more() + self.generate(e.content) + self.indent_less() + self.writeln(r"}") + self.writeln(r"\centerline{") + self.indent_more() + self.generate(e.caption) + self.indent_less() + self.writeln(r"}") + self.writeln(r"\vskip5pt{}") + self.ensure_empty(2) + + def generate_Emph(self, e: Emph): + if self._bold > 0: + self.write(r"{\bi{}") + else: + self.write(r"{\I{}") + self._italic+=1 + self.generate(e.content) + self._italic-=1 + self.write(r"}") + + def generate_Strong(self, e: Strong): + if self._italic > 0: + self.write(r"{\bi{}") + else: + self.write(r"{\bf{}") + self._bold+=1 + self.generate(e.content) + self._bold-=1 + self.write(r"}") + + def generate_Caption(self, e: Caption): + self.generate_Emph(e) + + def generate_Math(self, e: Math): + if e.format == "DisplayMath": + self.ensure_empty(2) + self.writeraw("$$") + self.writeraw(e.text.strip()) + self.writeraw("$$") + self.ensure_empty(2) + else: + self.write("$") + self.write(e.text) + self.write("$") + + def generate_Note(self, e: Note): + self.write(r"\fn{") + self.generate(inlinify(e)) + self.write(r"}") + + def generate_Table(self, e: Table): + aligns = { + "AlignLeft": r"\quad#\quad\hfil", + "AlignRight": r"\quad\hfil#\quad", + "AlignCenter": r"\quad\hfil#\hfil\quad", + "AlignDefault": r"\quad#\quad\hfil" + } + self.writeln(r"\vskip1em") + self.writeln(r"\halign{\strut"+"&".join([aligns[col[0]] for col in e.colspec])+r"\cr") + self.indent_more() + self.generate(e.head.content) + self.writeln(r"\noalign{\hrule}") + self.generate(e.content[0].content) + self.writeln(r"\noalign{\hrule}") + self.generate(e.foot.content) + self.indent_less() + self.writeln("}") + self.writeln(r"\vskip1em") + + def generate_TableRow(self, e: TableRow): + for cell in e.content: + if cell.colspan > 1: + self.write(r"\multispan"+str(cell.colspan)+"{} ") + self.generate(cell.content) + if cell.next: + self.write(" & ") + self.write(r"\cr") + self.endln() + + def generate_RawInline(self, e: RawInline): + if e.format == "tex": + self.write(e.text) + + def generate_RawBlock(self, e: RawBlock): + if e.format == "tex": + self.writeraw(e.text) + + def generate_Plain(self, e: Plain): + self.generate(e.content) + + def generate_Span(self, e: Span): + self.generate(e.content) + + def generate_CodeBlock(self, e: CodeBlock): + self.writeln(r"\verbatim{") + self.writeraw(e.text) + self.writeln(r"}") + + def generate_Div(self, e: Div): + self.generate(e.content) + + def generate_LineBlock(self, e: LineBlock): + self.writeln() + self.generate(e.content) + self.writeln() + + def generate_LineItem(self, e: LineItem): + self.generate(e.content) + if e.next: + self.write(r"\\") + self.endln() + + def generate_BulletList(self, e: BulletList): + self.ensure_empty(2) + self.writeln(r"\list{o}") + self.indent_more() + self.generate(e.content) + self.indent_less() + self.write(r"\endlist") + self.ensure_empty(2) + + def generate_OrderedList(self, e: OrderedList): + self.ensure_empty(2) + styles = { + "DefaultStyle": "n", + "Decimal": "n", + "LowerRoman": "i", + "UpperRoman:": "I", + "LowerAlpha": "a", + "UpperAlpha": "A" + } + style = styles[e.style] + delimiters = { + "DefaultDelim": f"{style}.", + "Period": f"{style}.", + "OneParen": f"{style})", + "TwoParens": f"({style})" + } + style = delimiters[e.delimiter] + self.writeln(r"\list{"+style+r"}") + self.indent_more() + self.generate(e.content) + self.indent_less() + self.writeln(r"\endlist") + self.ensure_empty(2) + + def generate_ListItem(self, e: ListItem): + self.endln() + self.write(r"\:") + self.generate(e.content) + self.endln() + + def generate_BlockQuote(self, e: BlockQuote): + self.writeln(r"\blockquote{") + self.indent_more() + self.generate(e.content) + self.indent_less() + self.writeln(r"}") + + def generate_Link(self, e: Link): + if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url: + self.write(r"\url{") + else: + self.write(r"\linkurl{"+e.url+r"}{") + self.generate(e.content) + self.write(r"}") # } + + def generate_Subscript(self, e: Subscript): + self.write(r"\subscript{") + self.generate(e.content) + self.write(r"}") + + def generate_Superscript(self, e: Superscript): + self.write(r"\superscript{") + self.generate(e.content) + self.write(r"}") + + def generate_simple_tag(self, e: Union[Element, None] = None, tag: str = "", attributes: Union[dict[str, str], None] = None, content: Union[ListContainer, Element, list[Union[Element, ListContainer]], str, None] = None, inline: Union[bool, None] = None): + print("dumbass: ", type(e)) + + # These are also disabled in pandoc so they shouldn't appear in the AST at all. + def generate_Citation(self, e: Citation): + self.writeln("% FIXME: Citations not implemented") + + def generate_Cite(self, e: Cite): + self.writeln("% FIXME: Cites not implemented") + + def generate_Definition(self, e: Definition): + self.writeln("% FIXME: Definitions not implemented") + + def generate_DefinitionItem(self, e: DefinitionItem): + self.writeln("% FIXME: DefinitionItems not implemented") + + def generate_DefinitionList(self, e: DefinitionList): + self.writeln("% FIXME: DefinitionLists not implemented") + + def generate_Underline(self, e: Underline): + self.writeln("% FIXME: Underlines not implemented") + + def generate_Strikeout(self, e: Strikeout): + self.writeln("% FIXME: Strikeouts not implemented") diff --git a/test/test-files/test-partial.md b/test/test-files/test-partial.md index db8ae8e..f1c9ab5 100644 --- a/test/test-files/test-partial.md +++ b/test/test-files/test-partial.md @@ -56,7 +56,7 @@ $$ $$ -![This is a figure, go figure...](logo.svg){width=25%}\ +![This is a figure, go figure...](logo.svg){width=25%}What ![This is a figure, go figure...](logo.pdf){width=50%}