Source code for timApp.printing.documentprinter

"""
Functions for calling pandoc and constructing the calls
"""
import os
import re
import subprocess
import tempfile
from pathlib import Path

from flask import current_app
from pypandoc import _as_unicode, _validate_formats
from pypandoc.py3compat import string_types, cast_bytes

from timApp.auth.accesshelper import has_view_access
from timApp.auth.sessioninfo import get_current_user_object
from timApp.document.docentry import DocEntry
from timApp.document.docinfo import DocInfo
from timApp.document.docparagraph import (
    DocParagraph,
    add_heading_numbers,
    add_headings_to_counters,
)
from timApp.document.docsettings import DocSettings
from timApp.document.document import dereference_pars, Document
from timApp.document.macroinfo import MacroInfo
from timApp.document.post_process import process_areas
from timApp.document.preloadoption import PreloadOption
from timApp.document.randutils import hashfunc
from timApp.document.specialnames import TEMPLATE_FOLDER_NAME, PRINT_FOLDER_NAME
from timApp.document.usercontext import UserContext
from timApp.document.viewcontext import default_view_ctx, copy_of_default_view_ctx
from timApp.document.yamlblock import strip_code_block
from timApp.folder.folder import Folder
from timApp.markdown.autocounters import AutoCounters
from timApp.markdown.markdownconverter import (
    expand_macros,
    create_environment,
    TimSandboxedEnvironment,
)
from timApp.plugin.plugin import get_value, PluginWrap
from timApp.plugin.plugin import parse_plugin_values_macros
from timApp.plugin.pluginControl import pluginify
from timApp.plugin.pluginOutputFormat import PluginOutputFormat
from timApp.plugin.pluginexception import PluginException
from timApp.printing.printeddoc import PrintedDoc
from timApp.printing.printsettings import PrintFormat
from timApp.timdb.dbaccess import get_files_path
from timApp.user.user import User
from timApp.util.utils import cache_folder_path
from tim_common.html_sanitize import sanitize_html

DEFAULT_PRINTING_FOLDER = cache_folder_path / "printed_documents"
TEMPLATES_FOLDER = Path(TEMPLATE_FOLDER_NAME) / PRINT_FOLDER_NAME
TEX_MACROS_KEY = "texmacros"

REGSLIDESEP = re.compile("^-{3,}$")  # slide separator


[docs]class PrintingError(Exception):
    pass


[docs]class LaTeXError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


[docs]def add_nonumber(md: str) -> str:
    r"""
    Adds {.unnumbered} after every heading line that starts with #
        Special cases:
            - many # lines in same md
                - before #-line there must be at least two cr
                - split between two cr
            - line starting with # may continue by ordinary line
                - the unnumbered must be added before first cr
            - line starting with # may continue next line and have \ at the end
                 - undefined

    :param md: markdown to be converted
    :return: markdown with headings marked as unnumbered
    """
    mds = md.split("\n\n")
    result = ""
    for m in mds:
        if m.startswith("#"):
            ms = m.split("\n")
            if not ms[0].endswith("\\"):
                ms[0] += "{.unnumbered}"
            m = "\n".join(ms)
        result += m + "\n\n"

    return result


[docs]def get_tex_settings_and_macros(
    d: Document,
    user_ctx: UserContext,
    template_doc: DocEntry | None = None,
    tformat: PrintFormat = PrintFormat.PLAIN,
):
    settings = d.get_settings()
    pdoc_plugin_attrs = settings.global_plugin_attrs()
    pdoc_macroinfo = settings.get_macroinfo(default_view_ctx, user_ctx)
    pdoc_macro_delimiter = pdoc_macroinfo.get_macro_delimiter()
    pdoc_macros = pdoc_macroinfo.get_macros()
    if tformat == PrintFormat.LATEX:
        pdoc_macros["tex"] = True
    pdoc_macro_env = create_environment(
        pdoc_macro_delimiter,
        user_ctx,
        default_view_ctx,
        pdoc_macros,
        d,
    )

    if template_doc:
        template_settings = template_doc.document.get_settings()
        pdoc_macros.update(
            template_settings.get_texmacroinfo(default_view_ctx).get_macros()
        )

    pdoc_macros.update(settings.get_texmacroinfo(default_view_ctx).get_macros())

    return (
        settings,
        pdoc_plugin_attrs,
        pdoc_macro_env,
        pdoc_macros,
        pdoc_macro_delimiter,
    )


[docs]def get_tex_macros(d: Document):
    settings = d.get_settings()
    # texmacros = settings.get_texmacroinfo(default_view_ctx).get_macros()
    texmacros = settings.get_setting_or_default("texmacros", {})

    return texmacros


[docs]class DocumentPrinter:
    def __init__(
        self,
        doc_entry: DocInfo,
        template_to_use: DocInfo | None,
        urlroot: str,
    ):
        self._doc_entry = doc_entry
        self._template_to_use = template_to_use
        self._content = None
        self._print_hash = None
        self._macros = {}
        self.texplain = False
        self.textplain = False
        self.texfiles = None
        self.urlroot = urlroot

[docs]    def get_template_id(self) -> int | None:
        if self._template_to_use:
            return self._template_to_use.id
        return None

[docs]    def get_content(
        self,
        user_ctx: UserContext,
        plugins_user_print: bool = False,
        target_format: PrintFormat = PrintFormat.PLAIN,
    ) -> str:
        """
        Gets the content of the DocEntry assigned for this DocumentPrinter object.
        Fetches the markdown for the documents paragraphs, checks whether the
        paragraph should be printed determined by a boolean 'print'-attribute,
        and returns the markdown for all the paragraphs that should be printed.

        Returns the (markdown) contents of the file as a single string, as that's the
        format pypandoc likes to handle.

        :return: The TIM documents contents in markdown format. Excludes the paragraphs that have attribute
                 print="false"
        """

        tformat = target_format
        if target_format in (PrintFormat.PDF, PrintFormat.JSON):
            tformat = PrintFormat.LATEX

        if self._content is not None:
            return self._content

        (
            settings,
            _,
            pdoc_macro_env,
            pdoc_macros,
            pdoc_macro_delimiter,
        ) = get_tex_settings_and_macros(
            self._doc_entry.document, user_ctx, self._template_to_use, tformat
        )

        self._macros = pdoc_macros

        # TODO: tries to change soft hyphens to LaTeX \-
        #       but Pandoc removes the \???
        """
        if tformat == PrintFormat.LATEX:
            charmacros = settings.get_charmacros() if settings else {}
            charmacros = charmacros | {"&shy;": "\\-"}
            if settings:
                settings.set_charmacros(charmacros)
        """
        # Remove paragraphs that are not to be printed and replace plugin pars,
        # that have a defined 'texprint' block in their yaml, with the 'texprint'-blocks content
        pars = self._doc_entry.document.get_paragraphs(include_preamble=True)
        self._doc_entry.document.preload_option = PreloadOption.all
        pars = dereference_pars(
            pars, context_doc=self._doc_entry.document, view_ctx=default_view_ctx
        )
        pars_to_print = []
        self.texplain = settings.is_texplain()
        self.textplain = settings.is_textplain()

        self.texfiles = (
            settings.get_texmacroinfo(default_view_ctx).get_macros().get("texfiles")
        )
        if self.texfiles and self.texfiles is str:
            self.texfiles = [self.texfiles]

        texmacros = get_tex_macros(self._doc_entry.document)

        view_ctx = default_view_ctx
        if texmacros:
            view_ctx = copy_of_default_view_ctx(texmacros)
        if tformat == PrintFormat.LATEX:  # ensure tex macro is set
            view_ctx = copy_of_default_view_ctx({"tex": True})

        # Process areas to determine what is visible to the user who is printing
        # TODO: We don't need to process all the areas, just need to find the IDs of the visible items
        processed_par_ids = {
            p.target_data.id
            for p in process_areas(
                settings,
                pars,
                pdoc_macros,
                pdoc_macro_delimiter,
                pdoc_macro_env,
                view_ctx,
                use_md=True,
                cache=False,
            )
        }

        par_infos: [  # TODO: Why this was list[]
            tuple[
                DocParagraph,
                DocSettings,
                dict,
                TimSandboxedEnvironment,
                dict[str, object],
                str,
            ]
        ] = []
        for par in pars:

            # do not print document settings pars
            if par.is_setting():
                continue

            if par.id not in processed_par_ids:
                continue

            p_info = par, *get_tex_settings_and_macros(
                par.doc, user_ctx, self._template_to_use, tformat
            )
            _, _, pdoc_plugin_attrs, env, pdoc_macros, pdoc_macro_delimiter = p_info

            if self.texplain or self.textplain:
                if par.get_markdown().find("#") == 0:
                    continue

            if par.has_class("hidden-print"):
                continue

            ppar = par
            # Replace plugin- and question pars with regular docpars with the md defined in the 'print' block
            # of their yaml as the md content of the replacement par
            if par.is_plugin():
                try:
                    plugin_yaml = parse_plugin_values_macros(
                        par=par,
                        global_attrs=pdoc_plugin_attrs,
                        macros=pdoc_macros,
                        env=env,
                    )
                except PluginException:
                    plugin_yaml = {}
                plugin_yaml_beforeprint = get_value(plugin_yaml, "texbeforeprint")
                if plugin_yaml_beforeprint is not None:
                    bppar = DocParagraph.create(
                        doc=self._doc_entry.document, md=plugin_yaml_beforeprint
                    )
                    par_infos.append(p_info)
                    pars_to_print.append(bppar)

                plugin_yaml_print = get_value(plugin_yaml, "texprint")
                if plugin_yaml_print is not None:
                    ppar = DocParagraph.create(
                        doc=self._doc_entry.document, md=plugin_yaml_print
                    )
                par_infos.append(p_info)
                pars_to_print.append(ppar)

                plugin_yaml_afterprint = get_value(plugin_yaml, "texafterprint")
                if plugin_yaml_afterprint is not None:
                    appar = DocParagraph.create(
                        doc=self._doc_entry.document, md=plugin_yaml_afterprint
                    )
                    par_infos.append(p_info)
                    pars_to_print.append(appar)

            else:
                par_infos.append(p_info)
                pars_to_print.append(ppar)

        # render markdown for plugins
        presult = pluginify(
            doc=self._doc_entry.document,
            pars=pars_to_print,
            user_ctx=user_ctx,
            view_ctx=view_ctx,
            pluginwrap=PluginWrap.Nothing,
            output_format=PluginOutputFormat.MD,
            user_print=plugins_user_print,
            target_format=tformat,
        )
        pars_to_print = presult.pars

        export_pars = []

        # TODO: Instead, convert all paragraph classes into environments and always emit \begin-\end for them
        environment_classes = set(pdoc_macros.get("texenvironment_classes", []))

        # Get the markdown for each par dict
        for p, (
            _,
            settings,
            pdoc_plugin_attrs,
            pdoc_macro_env,
            pdoc_macros,
            pdoc_macro_delimiter,
        ) in zip(pars_to_print, par_infos):
            md = p.prepare(view_ctx, use_md=True).output
            if not p.is_plugin() and not p.is_question():
                if not p.get_nomacros() and not self.texplain and not self.textplain:
                    env = pdoc_macro_env
                    counters = env.counters
                    if counters:
                        counters.task_id = p.get_auto_id()
                        counters.is_plugin = p.is_plugin()
                    md = expand_macros(
                        text=md,
                        macros=pdoc_macros,
                        settings=settings,
                        env=pdoc_macro_env,
                        ignore_errors=False,
                    )
                classes = p.classes

                if classes:
                    endraw = ""
                    beginraw = ""
                    nonumber = ""
                    for cls in classes:
                        if cls == "visible-print":
                            continue
                        if cls == "nonumber":
                            nonumber = "{.unnumbered}"
                        else:
                            if target_format == "html":
                                beginraw += '<div class="' + cls + '">'
                                endraw += "</div>"
                            elif target_format == "plain":
                                beginraw = ""
                            else:
                                is_env = cls in environment_classes
                                raw_type = "RAWTEXENV" if is_env else "RAWTEX"
                                beginraw += raw_type + cls + "\n\n"
                                endraw += f"\n\nEND" + raw_type
                                if is_env:
                                    endraw += cls

                    if nonumber:
                        md = add_nonumber(md)
                    md = beginraw + md + endraw

                if self.texplain or self.textplain:
                    if md.startswith("```"):
                        md = md[3:-3]
                if (
                    not pdoc_macros.get("texautonumber")
                    and settings.auto_number_headings()
                ):
                    md = add_heading_numbers(
                        md,
                        p,
                        settings.heading_format(),
                        initial_heading_counts=settings.auto_number_start(),
                    )

                """
                if pd['md'].startswith('#'):
                    pd['md'] += ' {{ {} }}'.format(
                        ' '.join(['.{}'.format(class_name) for class_name in pd['attrs'].get('classes', [])]))
                pd['md'] = expand_macros(text=pd['md'],
                                         macros=pdoc_macros,
                                         macro_delimiter=pdoc_macro_delimiter,
                                         env=pdoc_macro_env,
                                         ignore_errors=True)
                """
            if md.find("§") >= 0:  # check if slide fragments
                md = md.replace("<§", "").replace("§>", "").replace("§§", "")
            if md.find("---") >= 0:  # check if slide separator
                if REGSLIDESEP.match(md):
                    continue
            export_pars.append(md)

        if self.texplain or self.textplain:
            # Paragraphs are separated by a blank line in the Markdown format.
            content = "\n\n".join(export_pars)
        else:
            content = settings.get_doctexmacros() + "\n" + "\n\n".join(export_pars)

        self._content = content
        return content

[docs]    def get_autocounters(
        self,
        user_ctx: UserContext,
    ) -> AutoCounters:
        """
        Gets the content of the DocEntry assigned for this
        DocumentPrinter object. Builds autonumber counters
        from %%"name"|c_????%% filters.

        :return: counters for autonumbering
        """

        (
            settings,
            _,
            pdoc_macro_env,
            pdoc_macros,
            pdoc_macro_delimiter,
        ) = get_tex_settings_and_macros(
            self._doc_entry.document, user_ctx, self._template_to_use
        )

        self._macros = pdoc_macros
        counters = pdoc_macro_env.get_counters()
        counters.set_renumbering(True)
        counters.set_auto_number_headings(
            self._doc_entry.document.get_settings().auto_number_headings()
        )

        # Remove paragraphs that are not to be printed and replace plugin pars,
        # that have a defined 'texprint' block in their yaml, with the 'texprint'-blocks content
        # TODO: Check if this needs to be checked also for autonumbering
        pars = self._doc_entry.document.get_paragraphs(include_preamble=True)
        self._doc_entry.document.preload_option = PreloadOption.all
        pars = dereference_pars(
            pars, context_doc=self._doc_entry.document, view_ctx=default_view_ctx
        )

        view_ctx = default_view_ctx

        for par in pars:
            counters.task_id = par.get_auto_id()
            # do not count document settings pars
            if par.is_setting():
                continue

            p_info = par, *get_tex_settings_and_macros(
                par.doc, user_ctx, self._template_to_use
            )
            _, _, pdoc_plugin_attrs, env, pdoc_macros, pdoc_macro_delimiter = p_info
            env.set_counters(counters)
            counters.par = par

            # Replace plugin- and question pars with regular docpars
            # with the md defined in the 'print' block
            # of their yaml as the md content of the replacement par
            if par.is_plugin():
                try:
                    plugin_yaml = parse_plugin_values_macros(
                        par=par,
                        global_attrs=pdoc_plugin_attrs,
                        macros=pdoc_macros,
                        env=env,
                    )
                except PluginException:
                    pass
                continue

            # Get the markdown
            p = par

            md = p.prepare(view_ctx, use_md=True).output

            classes = p.classes
            nonumber = False

            if classes:
                for cls in classes:
                    if cls == "nonumber":
                        md = add_nonumber(md)
                        nonumber = True
                    else:
                        pass

            jump_name = p.attrs.get("taskId", None)

            # TODO: Make counters also for nonumbered and no auto_number
            if settings.auto_number_headings() and not nonumber:
                md = add_heading_numbers(
                    md,
                    p,
                    settings.heading_format(),
                    settings.heading_ref_format(),
                    jump_name,
                    counters,
                    initial_heading_counts=settings.auto_number_start(),
                )
            else:
                add_headings_to_counters(md, jump_name, counters)

            if not p.is_plugin() and not p.is_question():
                if not p.get_nomacros():
                    md = expand_macros(
                        text=md,
                        macros=pdoc_macros,
                        settings=settings,
                        env=env,
                        ignore_errors=False,
                    )

        return counters

[docs]    def write_to_format(
        self,
        user_ctx: UserContext,
        target_format: PrintFormat,
        path: Path,
        plugins_user_print: bool = False,
        eol_type: str = "native",
    ):
        """
        Converts the document to latex and returns the converted document as a bytearray

        :param user_ctx: The user context.
        :param target_format: The target file format
        :param plugins_user_print: Whether or not to print user input from plugins (instead of default values)
        :param path:  filepath to write
        :param eol_type: EOL type. Allows same option as Pandoc (crlf, lf, native)
        :return: Converted document as bytearray
        """

        with tempfile.NamedTemporaryFile(suffix=".latex", delete=True) as template_file:

            if self._template_to_use:
                template_content = DocumentPrinter.parse_template_content(
                    doc_to_print=self._doc_entry, template_doc=self._template_to_use
                )
            else:
                template_content = "$body$\n"

            if template_content is None:
                raise PrintingError(
                    f"The content in the template document {self._template_to_use.path} is not valid."
                )

            top_level = "section"
            if re.search(
                "^\\\\documentclass\\[[^\n]*(book|report)\\}",
                template_content,
                flags=re.S,
            ):
                top_level = "chapter"

            src = self.get_content(
                user_ctx,
                plugins_user_print=plugins_user_print,
                target_format=target_format,
            )

            # see: https://regex101.com/r/latest
            # src = re.sub(r'\{width=[^ }]* +([^}]*scale=[^%]*%[^}]*\})',                         r'{\1', src)

            templbyte = bytearray(template_content, encoding="utf-8")
            # template_file.write(templbyte) # for some reason does not write small files
            with open(template_file.name, "wb") as f:
                f.write(templbyte)

            print_dir = os.path.dirname(os.path.realpath(__file__))
            filters = [
                os.path.join(print_dir, "pandoc_inlinestylesfilter.py"),
                os.path.join(print_dir, "pandoc_imagefilepathsfilter.py"),
                # os.path.join(print_dir, "pandoc_headernumberingfilter.py")  # handled already when making md
            ]

            ftop = self._macros.get("texforcetoplevel", None)
            if ftop:
                top_level = ftop

            from_format = "markdown"
            if self.texplain:
                from_format = "latex"
            if self.textplain:
                from_format = "latex"

            texfiles = None
            if self.texfiles:
                texfiles = []
                for texfile in self.texfiles:
                    if texfile.startswith("http"):
                        texfiles.append(texfile)
                    else:
                        if texfile.find("/") < 0:  # add path if missing
                            texfile = (
                                self._doc_entry.document.docinfo.location
                                + "/"
                                + texfile
                            )
                        texfiles.append(
                            self.urlroot
                            + texfile
                            + "?file_type=latex&template_doc_id=0"
                        )

            # TODO: add also variables from texpandocvariables document setting, but this may lead to security hole?
            try:
                tim_convert_text(
                    source=src,
                    from_format=from_format,
                    to=target_format.value,
                    outputfile=path.absolute().as_posix(),  # output_file.name,
                    extra_args=[
                        "--template=" + template_file.name,
                        "--variable=TTrue:1",
                        "--variable=T1:1",
                        "--top-level-division=" + top_level,
                        "--markdown-headings=atx",
                        '--metadata=pagetitle:""',
                        # '--verbose',  # this gives non UTF8 results sometimes
                        "-Mtexdocid=" + str(self._doc_entry.id),
                    ],
                    filters=filters,
                    texfiles=texfiles,
                    eol_type=eol_type,
                )
            except LaTeXError as ex:
                raise LaTeXError(ex.value)
            except Exception as ex:
                raise PrintingError(f"<pre>{sanitize_html(str(ex))}</pre>")

[docs]    def get_print_path(
        self, file_type: PrintFormat, plugins_user_print: bool = False
    ) -> Path:
        """
        Formulates the printing path for the given document

        :param file_type: File format for the output
        :param plugins_user_print: should print user answers
        :return:
        """

        print_hash = self.hash_doc_print(plugins_user_print=plugins_user_print)

        path = (
            DEFAULT_PRINTING_FOLDER
            / str(self._doc_entry.id)
            / str(self.get_template_id())
            / str(print_hash + "." + file_type.value)
        )

        return path

[docs]    @staticmethod
    def get_user_templates(doc_entry: DocEntry, current_user: User) -> list[DocEntry]:
        templates = []

        if doc_entry is None or current_user is None:
            raise PrintingError(
                "You need to supply both the DocEntry and User to fetch the printing templates."
            )

        path = os.path.join(
            current_user.get_personal_folder().get_full_path(), TEMPLATES_FOLDER
        )

        templates_folder = Folder.find_by_path(path)

        if templates_folder is not None and has_view_access(templates_folder):
            docs = templates_folder.get_all_documents()
            if docs is not None:
                for d in docs:
                    if has_view_access(d) and not re.search(
                        f"/{PRINT_FOLDER_NAME}/.*{TEMPLATE_FOLDER_NAME}/", d.name
                    ):
                        templates.append(d)

        return templates

[docs]    @staticmethod
    def get_all_templates(doc_entry: DocEntry, current_user: User) -> list[DocEntry]:
        templates = []

        if doc_entry is None or current_user is None:
            raise PrintingError(
                "You need to supply both the DocEntry and User to fetch the printing templates."
            )

        current_folder = doc_entry.parent
        while current_folder is not None:

            path = os.path.join(current_folder.get_full_path(), TEMPLATES_FOLDER)

            templates_folder = Folder.find_by_path(path)

            if templates_folder is not None and has_view_access(templates_folder):
                docs = templates_folder.get_all_documents()

                if docs is None:
                    continue

                for d in docs:
                    if has_view_access(d) and not re.search(
                        f"/{PRINT_FOLDER_NAME}/.*{TEMPLATE_FOLDER_NAME}/", d.name
                    ):
                        templates.append(d)

            current_folder = current_folder.parent

        return templates

[docs]    @staticmethod
    def get_templates_as_dict(doc_entry: DocEntry, current_user: User):
        settings = doc_entry.document.get_settings()
        tex_template = settings.get("texTemplate", "")
        try:
            user_templates = DocumentPrinter.get_user_templates(
                doc_entry=doc_entry, current_user=current_user
            )
            all_templates = DocumentPrinter.get_all_templates(
                doc_entry=doc_entry, current_user=current_user
            )
            if tex_template:
                all_templates.append(DocEntry.find_by_path(tex_template))
        except PrintingError as err:
            raise PrintingError(str(err))

        default_templates = list(set(all_templates) - set(user_templates))

        user_templates_list: list[DocInfo] = []
        for t in user_templates:
            user_templates_list.append(t)

        default_templates_list: list[DocInfo] = []
        for t in default_templates:
            default_templates_list.append(t)

        user_templates_list.sort(key=lambda x: x.title)
        default_templates_list.sort(key=lambda x: x.title)

        templates_list = user_templates_list + default_templates_list

        return templates_list

[docs]    @staticmethod
    def parse_template_content(template_doc: DocInfo, doc_to_print: DocEntry) -> str:
        pars = template_doc.document.get_paragraphs()

        pars = dereference_pars(
            pars, context_doc=template_doc.document, view_ctx=default_view_ctx
        )

        # attach macros from target document to template
        template_settings = template_doc.document.get_settings()
        doc_settings = doc_to_print.document.get_settings()

        macros = template_settings.get_macroinfo(default_view_ctx).get_macros()
        macros.update(template_settings.get_texmacroinfo(default_view_ctx).get_macros())
        macros.update(doc_settings.get_macroinfo(default_view_ctx).get_macros())
        macros.update(doc_settings.get_texmacroinfo(default_view_ctx).get_macros())

        out_pars = []
        macroinfo = MacroInfo(default_view_ctx, macro_map=macros)
        # go through doc pars to get all the template pars
        for par in pars:
            if par.get_attr("printing_template") is not None:
                exp_md = par.get_expanded_markdown(
                    macroinfo=macroinfo, ignore_errors=True
                )
                out_pars.append(strip_code_block(exp_md.strip()))

        return "\n\n".join(out_pars)

[docs]    def get_document_version_as_float(self) -> float:
        doc_v = self._doc_entry.document.get_latest_version().get_version()
        doc_v_fst, doc_v_snd = doc_v[0], doc_v[1]
        return doc_v_fst + doc_v_snd / 10

[docs]    def get_template_version_as_float(self) -> float | None:
        if self._template_to_use is None:
            return None

        doc_v = self._template_to_use.document.get_latest_version().get_version()
        doc_v_fst, doc_v_snd = doc_v[0], doc_v[1]
        return doc_v_fst + doc_v_snd / 10

[docs]    def hash_doc_print(self, plugins_user_print: bool = False) -> str:
        thash = ""
        if self._template_to_use:
            thash = self._template_to_use.last_modified
        content = (
            str(self._doc_entry.id)
            + " "
            + str(self._doc_entry.last_modified)
            + str(self.get_template_id())
            + " "
            + str(thash)
        )
        if plugins_user_print:
            content += str(plugins_user_print) + str(get_current_user_object().id)

        return hashfunc(content)

[docs]    def get_printed_document_path_from_db(
        self, file_type: PrintFormat, plugins_user_print: bool = False
    ) -> str | None:
        existing_print: PrintedDoc | None = (
            PrintedDoc.query.filter_by(
                doc_id=self._doc_entry.id,
                template_doc_id=self.get_template_id(),
                file_type=file_type.value,
                version=self.hash_doc_print(plugins_user_print=plugins_user_print),
            )
            .order_by(PrintedDoc.id.desc())
            .first()
        )
        if existing_print is None or not os.path.exists(existing_print.path_to_file):
            return None

        return existing_print.path_to_file


[docs]def number_lines(s: str, start: int = 1):
    lines = s.split("\n")
    i = start
    result = ""
    for line in lines:
        result += f"{i:3}: {line}\n"
        i += 1
    return result


# ------------------------ copied from pypandoc / Juho Vepsäläinen ---------------------------------
# Use own version because the original breaks if there are non-ASCII chars in error messages


[docs]def tim_convert_text(
    source,
    to,
    from_format,
    extra_args=(),
    encoding="utf-8",
    outputfile=None,
    filters=None,
    removethis=None,
    texfiles=None,
    eol_type="native",
):
    """Converts given `source` from `format` to `to`.

    :param str source: Unicode string or bytes (see encoding)
    :param str to: format into which the input should be converted; can be one of
            `pypandoc.get_pandoc_formats()[1]`
    :param str from_format: the format of the inputs; can be one of `pypandoc.get_pandoc_formats()[1]`
    :param list extra_args: extra arguments (list of strings) to be passed to pandoc
            (Default value = ())
    :param str encoding: the encoding of the input bytes (Default value = 'utf-8')
    :param str outputfile: output will be written to outfilename or the converted content
            returned if None (Default value = None)
    :param list filters: pandoc filters e.g. filters=['pandoc-citeproc']
    :param removethis: lines that contains this text are removed from genereted LaTeX file
    :param texfiles: what files need to copy
    :returns: converted string (unicode) or an empty string if an outputfile was given
    :param eol_type: EOL type to use. Allowed values are same as Pandoc (crlf, lf, native)
    :rtype: unicode

    :raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error
    :raises OSError: if pandoc is not found; make sure it has been installed and is available at
            path.
    """
    source = _as_unicode(source, encoding)
    return tim_convert_input(
        source,
        from_format,
        "string",
        to,
        extra_args=extra_args,
        outputfile=outputfile,
        filters=filters,
        removethis=removethis,
        texfiles=texfiles,
        eol_type=eol_type,
    )


[docs]def tim_convert_input(
    source,
    from_format,
    input_type,
    to,
    extra_args=(),
    outputfile=None,
    filters=None,
    removethis=None,
    texfiles=None,
    eol_type="native",
):
    pandoc_path = "/usr/bin/pandoc"
    stdout = ""

    from_format, to = _validate_formats(from_format, to, outputfile)
    is_pdf = outputfile and outputfile.find(".pdf") >= 0
    latex_file = outputfile

    # To get access to pandoc-citeproc when we use a included copy of pandoc,
    # we need to add the pypandoc/files dir to the PATH
    new_env = os.environ.copy()
    files_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files")
    new_env["PATH"] = (
        new_env.get(
            "PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
        )
        + os.pathsep
        + files_path
    )
    string_input = input_type == "string"
    new_env["TIM_HOST"] = current_app.config["TIM_HOST"]

    if is_pdf:
        latex_file = outputfile.replace(".pdf", ".latex")

    for texfile in texfiles or []:
        get_file(latex_file, texfile, new_env)

    if from_format == "latex":
        with open(latex_file, "w", encoding="utf-8") as f:
            if eol_type == "native":
                # Resolve the EOL type to the native one of the OS
                eol_type = "lf" if os.linesep == "\n" else "crlf"
            if eol_type == "crlf":
                f.write(source.replace("\n", "\r\n"))
            elif eol_type == "lf":
                f.write(source.replace("\r\n", "\n"))
            else:
                f.write(source)
    else:

        input_file = [source] if not string_input else []
        args = [pandoc_path, "--from=" + from_format, "--to=" + to]

        args += input_file

        if outputfile:
            args.append("--output=" + latex_file)

        args.extend(extra_args)
        args.append(f"--eol={eol_type}")

        # adds the proper filter syntax for each item in the filters list
        if filters is not None:
            if isinstance(filters, string_types):
                filters = filters.split()
            f = ["--filter=" + x for x in filters]
            args.extend(f)
        try:  # Hack because images in mmcqs is not found
            Path("/images").symlink_to(get_files_path() / "blocks/images")
        except:
            pass
        p = subprocess.Popen(
            args,
            stdin=subprocess.PIPE if string_input else None,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=new_env,
        )

        # something else than 'None' indicates that the process already terminated
        if not (p.returncode is None):
            raise RuntimeError(
                f'Pandoc died with exitcode "{p.returncode}" before receiving input: {p.stderr.read()}'
            )

        try:
            bsource = cast_bytes(source, encoding="utf-8")
        except (UnicodeDecodeError, UnicodeEncodeError):
            # assume that it is already a utf-8 encoded string
            bsource = source
        try:
            stdout, stderr = p.communicate(bsource if string_input else None)
        except OSError:
            # this is happening only on Py2.6 when pandoc dies before reading all
            # the input. We treat that the same as when we exit with an error...
            raise RuntimeError(
                'Pandoc died with exitcode "%s" during conversion.' % p.returncode
            )

        stdout = _decode_result(stdout)
        stderr = _decode_result(stderr)
        if stdout or stderr:
            raise RuntimeError(
                'Pandoc died with exitcode "%s" during conversion. \nSource=\n%s'
                % (stdout + stderr, number_lines(source))
            )

        with open(latex_file, encoding="utf-8") as r:
            lines = r.readlines()
        with open(latex_file, "w", encoding="utf-8") as f:
            for line in lines:
                if removethis:
                    if line.find(removethis) >= 0:
                        continue
                # line = line.replace(']{ ', ']{')  # correct "]{ %%" problem caused by Jinja 2 macros
                f.write(line)

    if is_pdf:
        p, stdout = run_latex(outputfile, latex_file, new_env, "")

    # if there is an outputfile, then stdout is likely empty!
    return stdout


def _decode_result(s):
    try:
        s = s.decode("utf-8")
    except UnicodeDecodeError:
        # this shouldn't happen: pandoc more or less garantees that the output is utf-8!
        # raise RuntimeError('Pandoc output was not utf-8.')
        # noinspection PyBroadException
        try:
            s = s.decode("iso-8859-15")
        except Exception:
            pass
    s = s.replace("\\n", "\n")
    return s


[docs]def run_latex(outputfile, latex_file, new_env, string_input):
    try:
        filedir = os.path.dirname(outputfile)
        args = [
            "latexmk",
            "-g",
            "-f",
            "-pdfxe",
            f"-output-directory={filedir}",  # '-file-line-error',
            "-interaction=nonstopmode",
            latex_file,
        ]
        p = subprocess.Popen(
            args,
            stdin=subprocess.PIPE if string_input else None,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            cwd=filedir,
            env=new_env,
        )

        # something else than 'None' indicates that the process already terminated
        if not (p.returncode is None):
            raise RuntimeError(
                f'LaTeX died with exitcode "{p.returncode}" before receiving input: {p.stderr.read()}'
            )

        stdout, stderr = p.communicate(None)
        stdout = _decode_result(stdout)
        stderr = _decode_result(stderr)
        # check if latex returned successfully
        if p.returncode != 0:
            # Find errors:
            i = stdout.find("\n!")  # find possible error line from normal output
            # i = stdout.find('\n'+ latex_file + ':') # find possible error line in file:line:error format
            line = ""
            if i >= 0:
                stdout = stdout[i:]
                stdout = re.sub("\n\\(/usr/.*[^\n]", "", stdout)
                stdout = stdout.replace(latex_file, "")
                i = stdout.find("/var/lib")
                if i >= 0:
                    stdout = stdout[: i - 3]
                i = stdout.find("\nl.")
                if i >= 0:
                    i2 = stdout.find(" ", i)
                    if i2 >= 0:
                        line = stdout[i + 3 : i2]
            raise LaTeXError(
                {"line": line, "latex": latex_file, "pdf": outputfile, "error": stdout}
                # 'LINE: %s\nLATEX:%s\nPDF:%s\nLaTeX died with exitcode "%s" during conversion: \n%s\n%s' %
                # (line, latex_file, outputfile, p.returncode, stdout, stderr)
            )
        return p, stdout
    except OSError as ex:
        # this is happening only on Py2.6 when pandoc dies before reading all
        # the input. We treat that the same as when we exit with an error...
        raise RuntimeError('LaTeX died with error "%s" during conversion.' % str(ex))


[docs]def get_file(latex_file, fileurl, new_env):
    filedir = os.path.dirname(latex_file)
    end = fileurl.find("?")
    if end < 0:
        end = len(fileurl)
    filename = fileurl[fileurl.rfind("/") + 1 : end]
    dot = filename.rfind(".")  # change last - to . if there is no dot at the end
    minus = filename.rfind("-")
    if dot < minus:
        filename = (
            filename[:minus] + "." + filename[minus + 1 :]
        )  # TODO: Remove this when all texfiles are changed and renamed
    args = ["wget", fileurl, "-O", filename]
    p = subprocess.Popen(
        args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=filedir, env=new_env
    )

    code = p.returncode
    if code is not None:
        raise RuntimeError(
            f'Get file died with exitcode "{code}" before receiving input: {p.stderr.read()}'
        )

    stdout, stderr = p.communicate()
    stdout = _decode_result(stdout)
    stderr = _decode_result(stderr)
    if p.returncode > 0:
        raise RuntimeError(
            f'Get file {fileurl} failed: "{p.returncode}": {stdout} {stderr}'
        )
TIM documentation

Source code for timApp.printing.documentprinter