Browse Source

První draft generátoru pro UCWTex. Ocením feedback od @jirikalvoda a @mj. #22

error-handling
Jan Černohorský 1 year ago
parent
commit
1b971ea3b4
  1. 25
      formatitko.tex
  2. 14
      src/formatitko/formatitko.py
  3. 8
      src/formatitko/html_generator.py
  4. 31
      src/formatitko/output_generator.py
  5. 360
      src/formatitko/tex_generator.py
  6. 2
      test/test-files/test-partial.md

25
formatitko.tex

@ -1,9 +1,6 @@
\input luatex85.sty \input luatex85.sty
\input ucwmac2.tex \input ucwmac2.tex
\ucwmodule{luaofs} \parskip=5pt plus 3pt minus 2pt
\ucwmodule{link}
\ucwmodule{verb}
\parskip=3pt plus 2pt minus 1pt
\parindent=0sp \parindent=0sp
\def\strong#1{{% \def\strong#1{{%
@ -17,23 +14,17 @@
\def\superscript#1{\leavevmode\raise3pt\hbox{\fiverm#1}} \def\superscript#1{\leavevmode\raise3pt\hbox{\fiverm#1}}
\def\subscript#1{\leavevmode\lower1pt\hbox{\fiverm#1}}
\newcount\fncount \newcount\fncount
\fncount=1 \fncount=1
\def\fnmark{\superscript{\the\fncount}} \def\fnmark{\superscript{\the\fncount}}
\def\fn#1{\footnote\fnmark{#1}\advance\fncount by 1} \def\fn#1{\footnote\fnmark{#1}\advance\fncount by 1}
\def\hA#1{{\parskip1em\settextsize{14}\bf #1}} \def\section#1{{\parskip1em\settextsize{18}\bf #1}}
\def\hB#1{{\parskip1em\settextsize{12}\bf #1}} \def\subsection#1{{\parskip1em\settextsize{16}\bf #1}}
\def\hC#1{{\parskip1em\settextsize{10}\bf #1}} \def\subsubsection#1{{\parskip1em\settextsize{14}\bf #1}}
\def\hD#1{{\parskip1em\settextsize{10}\bi #1}} \def\subsubsubsection#1{{\parskip1em\settextsize{12}\bf #1}}
\def\hr{{\vskip5pt\hrule\vskip5pt}} \def\subsubsubsubsection#1{{\parskip1em\settextsize{10}\bf #1}}
\def\subsubsubsubsubsection#1{{\parskip1em\settextsize{10}\bi #1}}
\long\def\blockquote#1{\vskip\lineskip\vskip\parskip\hbox{\vrule\hskip5pt\vbox{#1}}} \long\def\blockquote#1{\vskip\lineskip\vskip\parskip\hbox{\vrule\hskip5pt\vbox{#1}}}
\let\code\verbatim
\let\codeblock\verbatim
\def\subscript#1{\leavevmode\lower1pt\hbox{\fiverm#1}}
\def\strikeout#1{FIXME: Strikeout not implemented} \def\strikeout#1{FIXME: Strikeout not implemented}
\def\underline#1{FIXME: Underline not implemented} \def\underline#1{FIXME: Underline not implemented}
\def\figure#1#2{\vskip5pt\centerline{#1}\centerline{#2}\vskip5pt}
\def\figcaption#1{{\it #1}}
\let\image\putimage
\def\languagecs{} % KSP should define this to \cze probably
\def\languageen{} % KSP should define this to \eng probably

14
src/formatitko/formatitko.py

@ -15,6 +15,7 @@ from .output_generator import OutputGenerator
from .html_generator import HTMLGenerator from .html_generator import HTMLGenerator
from .transform_processor import TransformProcessor from .transform_processor import TransformProcessor
from .pandoc_processor import PandocProcessor from .pandoc_processor import PandocProcessor
from .tex_generator import UCWTexGenerator
from panflute import convert_text from panflute import convert_text
@ -49,13 +50,20 @@ def main():
if args.output_html is not None: if args.output_html is not None:
# Initialize KaTeX client (this runs the node app and connects to a unix socket) # Initialize KaTeX client (this runs the node app and connects to a unix socket)
with KatexClient() as katexClient: with KatexClient() as katexClient:
HTMLGenerator(open(args.output_html, "w"), katexClient, imageProcessor).generate(doc) with open(args.output_html, "w") as file:
HTMLGenerator(file, katexClient, imageProcessor).generate(doc)
if args.output_tex is not None:
with open(args.output_tex, "w") as file:
UCWTexGenerator(file, imageProcessor).generate(doc)
if args.output_md is not None: if args.output_md is not None:
open(args.output_md, "w").write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="markdown")) with open(args.output_md, "w") as file:
file.write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="markdown"))
if args.output_json is not None: if args.output_json is not None:
open(args.output_json, "w").write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="json")) with open(args.output_json, "w") as file:
file.write(convert_text(PandocProcessor().transform(doc), input_format="panflute", output_format="json"))
if args.debug: if args.debug:
print("-----------------------------------") print("-----------------------------------")

8
src/formatitko/html_generator.py

@ -243,7 +243,7 @@ class HTMLGenerator(OutputGenerator):
"DisplayMath": True, "DisplayMath": True,
"InlineMath": False "InlineMath": False
} }
self.writeln(self.katexClient.render(e.text, {"displayMode": formats[e.format]})) self.writeraw(self.katexClient.render(e.text, {"displayMode": formats[e.format]}))
def generate_RawInline(self, e: RawInline): def generate_RawInline(self, e: RawInline):
if e.format == "html": if e.format == "html":
@ -291,13 +291,13 @@ class HTMLGenerator(OutputGenerator):
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};" attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"
self.generate_simple_tag(e, attributes=attributes) self.generate_simple_tag(e, attributes=attributes)
def generate_Cite(self, e: Cite):
self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"})
# These are also disabled in pandoc so they shouldn't appear in the AST at all. # These are also disabled in pandoc so they shouldn't appear in the AST at all.
def generate_Citation(self, e: Citation): def generate_Citation(self, e: Citation):
self.writeln("<!-- FIXME: Citations not implemented -->") self.writeln("<!-- FIXME: Citations not implemented -->")
def generate_Cite(self, e: Cite):
self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"})
def generate_Definition(self, e: Definition): def generate_Definition(self, e: Definition):
self.writeln("<!-- FIXME: Definitions not implemented -->") self.writeln("<!-- FIXME: Definitions not implemented -->")

31
src/formatitko/output_generator.py

@ -16,7 +16,7 @@ class UnknownElementError(Exception):
pass pass
class OutputGenerator: class OutputGenerator:
_at_start_of_line: bool _empty_lines: int
context: Union[Context, None] context: Union[Context, None]
indent_level: int indent_level: int
indent_str: str indent_str: str
@ -29,7 +29,7 @@ class OutputGenerator:
self.output_file = output_file self.output_file = output_file
self.indent_str = indent_str self.indent_str = indent_str
self.indent_level = initial_indent_level self.indent_level = initial_indent_level
self._at_start_of_line = True self._empty_lines = 1
self.context = None self.context = None
self.TYPE_DICT_MISC = { self.TYPE_DICT_MISC = {
@ -136,29 +136,32 @@ class OutputGenerator:
def indent_less(self): def indent_less(self):
self.indent_level -= 1 self.indent_level -= 1
def write(self, text: str): def write(self, text: str=""):
if self._at_start_of_line: if self._empty_lines > 0:
self.output_file.write(self.indent()) self.output_file.write(self.indent())
self.output_file.write(text) self.output_file.write(text)
self._at_start_of_line = False self._empty_lines = 0
def writeln(self, text: str): def writeln(self, text: str=""):
if not self._at_start_of_line: if self._empty_lines == 0:
self.output_file.write("\n") self.output_file.write("\n")
self.output_file.write(self.indent()) self.output_file.write(self.indent())
self.output_file.write(text+"\n") self.output_file.write(text+"\n")
self._at_start_of_line = True self._empty_lines = 1
def writeraw(self, text: str): def writeraw(self, text: str=""):
if not self._at_start_of_line: if self._empty_lines == 0:
self.output_file.write("\n") self.output_file.write("\n")
self.output_file.write(text+"\n") self.output_file.write(text+"\n")
self._at_start_of_line = True self._empty_lines = 1
def endln(self): def ensure_empty(self, n: int=1):
if not self._at_start_of_line: while self._empty_lines < n:
self.output_file.write("\n") self.output_file.write("\n")
self._at_start_of_line = True self._empty_lines+=1
def endln(self):
self.ensure_empty(1)
def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str: def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
return tag return tag

360
src/formatitko/tex_generator.py

@ -0,0 +1,360 @@
from panflute import Element, ListContainer, Inline, Block
from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, Superscript, Underline
from panflute import BlockQuote, BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Div, Figure, Header, HorizontalRule, LineBlock, LineItem, ListItem, MetaBlocks, MetaBool, MetaInlines, MetaList, MetaMap, MetaString, Null, OrderedList, Para, Plain, RawBlock, Table, TableBody, TableFoot, TableHead
from panflute import TableRow, TableCell, Caption, Doc
from panflute import MetaValue
from typing import Union
import os
from .output_generator import OutputGenerator
from .images import ImageProcessor
from .whitespace import NBSP
from .elements import FQuoted
from .context import Group, InlineGroup, BlockGroup, Context
from .util import inlinify
class UCWTexGenerator(OutputGenerator):
imageProcessor: ImageProcessor
_bold: int
_italic: int
def __init__(self, output_file, imageProcessor: ImageProcessor, *args, **kwargs):
self.imageProcessor = imageProcessor
self._bold = 0
self._italic = 0
super().__init__(output_file, *args, **kwargs)
def escape_special_chars(self, text: str) -> str:
text = text.replace("&", r"\&")
text = text.replace("%", r"\%")
text = text.replace("$", r"\$")
text = text.replace("#", r"\#")
text = text.replace("_", r"\_")
text = text.replace("{", r"\{")
text = text.replace("}", r"\}")
text = text.replace("~", r"\textasciitilde{}")
text = text.replace("^", r"\textasciicircum{}")
text = text.replace("\\", r"\textbackslash{}")
text = text.replace(" ", "~") # We use unicode no-break spaces to force nbsp in output
text = text.replace("", "")
return text
def generate(self, e: Union[Element, ListContainer]):
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex":
return
super().generate(e)
def writepar(self, text: str):
self.ensure_empty(2)
self.writeln(text)
self.ensure_empty(2)
def generate_Null(self, e: Null):
pass
def generate_LineBreak(self, e: LineBreak):
self.write(r"\\")
self.endln()
def generate_Para(self, e: Para):
self.ensure_empty(2)
self.generate(e.content)
self.ensure_empty(2)
def generate_HorizontalRule(self, e: HorizontalRule):
self.writepar(r"\vskip5pt\hrule\hfil\vskip5pt{}")
def generate_Doc(self, e: Doc):
self.writeln(r"\input ucwmac2.tex")
self.writeln(r"\ucwmodule{ofs}")
self.writeln(r"\ucwmodule{verb}")
self.writeln(r"\ucwmodule{link}")
self.writeln(r"\input formatitko.tex")
self.generate(e.content)
self.writeln(r"\bye")
def get_language_macro(self, lang: str):
if lang == "cs":
return r"\chyph\lefthyphenmin=2\righthyphenmin=2{}"
elif lang == "sk":
return r"\shyph\lefthyphenmin=2\righthyphenmin=2{}"
elif lang == "en":
return r"\ehyph\lefthyphenmin=2\righthyphenmin=2{}"
else:
return ""
def generate_InlineGroup(self, e: InlineGroup):
self.write(r"{")
self.write(self.get_language_macro(self.context.get_metadata("lang")))
self.generate(e.content)
self.write(r"}")
def generate_BlockGroup(self, e: BlockGroup):
self.writeln(r"\begingroup")
self.indent_more()
self.writeln(self.get_language_macro(self.context.get_metadata("lang")))
self.generate(e.content)
self.indent_less()
self.writeln(r"\endgroup")
def generate_Header(self, e: Header):
self.ensure_empty(2)
self.write("\\"+"sub"*(e.level-1)+"section{")
self.generate(e.content)
self.write(r"}")
self.ensure_empty(2)
def generate_Image(self, e: Image):
url = e.url
additional_args = self.get_image_processor_args(e.attributes)
# The directory of the current file, will also look for images there.
source_dir = self.context.dir
_, ext = os.path.splitext(url)
ext = ext[1:]
# Conversions between various formats.
if ext in ["pdf", "png", "jpeg"]:
# Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the cache
# directory.
url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args)
elif ext in ["svg"]:
url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["epdf"]:
url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args)
elif ext in ["jpg"]:
url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args)
else:
url = self.imageProcessor.process_image(url, "pdf", source_dir, **additional_args)
url = self.imageProcessor.find_image(url, [self.imageProcessor.cache_dir])
width = ""
if "width" in e.attributes:
width = e.attributes["width"]
# 50% → 0.5\hsize
if e.attributes["width"][-1] == "%":
width = str(int(e.attributes["width"][:-1])/100) + "\\hsize"
width = "width " + width
if isinstance(e.parent.parent, Figure):
self.writeln(f'\\putimage{{{width}}}{{{url}}}')
else:
self.writepar(f'\\putimage{{{width}}}{{{url}}}')
def generate_Code(self, e: Code):
self.write(r"\verb`")
self.write(e.text)
self.write(r"`")
def generate_Figure(self, e: Figure):
self.ensure_empty(2)
self.writeln(r"\vskip5pt")
self.writeln(r"\centerline{")
self.indent_more()
self.generate(e.content)
self.indent_less()
self.writeln(r"}")
self.writeln(r"\centerline{")
self.indent_more()
self.generate(e.caption)
self.indent_less()
self.writeln(r"}")
self.writeln(r"\vskip5pt{}")
self.ensure_empty(2)
def generate_Emph(self, e: Emph):
if self._bold > 0:
self.write(r"{\bi{}")
else:
self.write(r"{\I{}")
self._italic+=1
self.generate(e.content)
self._italic-=1
self.write(r"}")
def generate_Strong(self, e: Strong):
if self._italic > 0:
self.write(r"{\bi{}")
else:
self.write(r"{\bf{}")
self._bold+=1
self.generate(e.content)
self._bold-=1
self.write(r"}")
def generate_Caption(self, e: Caption):
self.generate_Emph(e)
def generate_Math(self, e: Math):
if e.format == "DisplayMath":
self.ensure_empty(2)
self.writeraw("$$")
self.writeraw(e.text.strip())
self.writeraw("$$")
self.ensure_empty(2)
else:
self.write("$")
self.write(e.text)
self.write("$")
def generate_Note(self, e: Note):
self.write(r"\fn{")
self.generate(inlinify(e))
self.write(r"}")
def generate_Table(self, e: Table):
aligns = {
"AlignLeft": r"\quad#\quad\hfil",
"AlignRight": r"\quad\hfil#\quad",
"AlignCenter": r"\quad\hfil#\hfil\quad",
"AlignDefault": r"\quad#\quad\hfil"
}
self.writeln(r"\vskip1em")
self.writeln(r"\halign{\strut"+"&".join([aligns[col[0]] for col in e.colspec])+r"\cr")
self.indent_more()
self.generate(e.head.content)
self.writeln(r"\noalign{\hrule}")
self.generate(e.content[0].content)
self.writeln(r"\noalign{\hrule}")
self.generate(e.foot.content)
self.indent_less()
self.writeln("}")
self.writeln(r"\vskip1em")
def generate_TableRow(self, e: TableRow):
for cell in e.content:
if cell.colspan > 1:
self.write(r"\multispan"+str(cell.colspan)+"{} ")
self.generate(cell.content)
if cell.next:
self.write(" & ")
self.write(r"\cr")
self.endln()
def generate_RawInline(self, e: RawInline):
if e.format == "tex":
self.write(e.text)
def generate_RawBlock(self, e: RawBlock):
if e.format == "tex":
self.writeraw(e.text)
def generate_Plain(self, e: Plain):
self.generate(e.content)
def generate_Span(self, e: Span):
self.generate(e.content)
def generate_CodeBlock(self, e: CodeBlock):
self.writeln(r"\verbatim{")
self.writeraw(e.text)
self.writeln(r"}")
def generate_Div(self, e: Div):
self.generate(e.content)
def generate_LineBlock(self, e: LineBlock):
self.writeln()
self.generate(e.content)
self.writeln()
def generate_LineItem(self, e: LineItem):
self.generate(e.content)
if e.next:
self.write(r"\\")
self.endln()
def generate_BulletList(self, e: BulletList):
self.ensure_empty(2)
self.writeln(r"\list{o}")
self.indent_more()
self.generate(e.content)
self.indent_less()
self.write(r"\endlist")
self.ensure_empty(2)
def generate_OrderedList(self, e: OrderedList):
self.ensure_empty(2)
styles = {
"DefaultStyle": "n",
"Decimal": "n",
"LowerRoman": "i",
"UpperRoman:": "I",
"LowerAlpha": "a",
"UpperAlpha": "A"
}
style = styles[e.style]
delimiters = {
"DefaultDelim": f"{style}.",
"Period": f"{style}.",
"OneParen": f"{style})",
"TwoParens": f"({style})"
}
style = delimiters[e.delimiter]
self.writeln(r"\list{"+style+r"}")
self.indent_more()
self.generate(e.content)
self.indent_less()
self.writeln(r"\endlist")
self.ensure_empty(2)
def generate_ListItem(self, e: ListItem):
self.endln()
self.write(r"\:")
self.generate(e.content)
self.endln()
def generate_BlockQuote(self, e: BlockQuote):
self.writeln(r"\blockquote{")
self.indent_more()
self.generate(e.content)
self.indent_less()
self.writeln(r"}")
def generate_Link(self, e: Link):
if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url:
self.write(r"\url{")
else:
self.write(r"\linkurl{"+e.url+r"}{")
self.generate(e.content)
self.write(r"}") # }
def generate_Subscript(self, e: Subscript):
self.write(r"\subscript{")
self.generate(e.content)
self.write(r"}")
def generate_Superscript(self, e: Superscript):
self.write(r"\superscript{")
self.generate(e.content)
self.write(r"}")
def generate_simple_tag(self, e: Union[Element, None] = None, tag: str = "", attributes: Union[dict[str, str], None] = None, content: Union[ListContainer, Element, list[Union[Element, ListContainer]], str, None] = None, inline: Union[bool, None] = None):
print("dumbass: ", type(e))
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
def generate_Citation(self, e: Citation):
self.writeln("% FIXME: Citations not implemented")
def generate_Cite(self, e: Cite):
self.writeln("% FIXME: Cites not implemented")
def generate_Definition(self, e: Definition):
self.writeln("% FIXME: Definitions not implemented")
def generate_DefinitionItem(self, e: DefinitionItem):
self.writeln("% FIXME: DefinitionItems not implemented")
def generate_DefinitionList(self, e: DefinitionList):
self.writeln("% FIXME: DefinitionLists not implemented")
def generate_Underline(self, e: Underline):
self.writeln("% FIXME: Underlines not implemented")
def generate_Strikeout(self, e: Strikeout):
self.writeln("% FIXME: Strikeouts not implemented")

2
test/test-files/test-partial.md

@ -56,7 +56,7 @@ $$
$$ $$
![This is a figure, go figure...](logo.svg){width=25%}\ ![This is a figure, go figure...](logo.svg){width=25%}What
![This is a figure, go figure...](logo.pdf){width=50%} ![This is a figure, go figure...](logo.pdf){width=50%}

Loading…
Cancel
Save