Source code for timApp.admin.replace_in_documents
from argparse import ArgumentTypeError
from copy import copy
from dataclasses import dataclass
from typing import Generator
import attr
from yaml import YAMLError
from timApp.admin.search_in_documents import (
create_basic_search_argparser,
search,
SearchResult,
SearchArgumentsBasic,
)
from timApp.admin.util import (
process_items,
DryrunnableOnly,
BasicArguments,
get_url_for_match,
)
from timApp.document.docinfo import DocInfo
from timApp.document.docparagraph import DocParagraph
from timApp.document.viewcontext import default_view_ctx
from timApp.document.yamlblock import YamlBlock
from timApp.timdb.exceptions import TimDbException
[docs]@attr.s
class ReplaceArguments(DryrunnableOnly, SearchArgumentsBasic):
"""Arguments for a replacement operation."""
to: str | None = attr.ib(kw_only=True, default=None)
add_attr: str | None = attr.ib(kw_only=True, default=None)
[docs]@attr.s
class ReplaceArgumentsCLI(ReplaceArguments, BasicArguments):
"""Command-line arguments for a replacement operation."""
[docs]def min_replacement_length(x: str) -> str:
if len(x) < 3:
raise ArgumentTypeError("String to replace must be at least 3 characters.")
return x
[docs]@attr.s
class ReplacementResult:
"""Represents a single replacement in a :class:`DocParagraph`."""
search_result: SearchResult = attr.ib(kw_only=True)
replacement: str = attr.ib(kw_only=True)
error: str = attr.ib(kw_only=True, default=None)
[docs] def get_new_markdown(self) -> str:
"""Gets the new markdown after applying the replacement string.
NOTE: This replaces ALL occurrences currently.
"""
old_md = self.search_result.match_pattern.string
new_md = self.search_result.match_pattern.re.sub(
self.replacement, old_md, count=0
)
return new_md
[docs] def get_replacement(self) -> tuple[str, str]:
return self.search_result.match_pattern.group(
0
), self.search_result.match_pattern.expand(self.replacement)
[docs] def format_match(self, args: ReplaceArgumentsCLI) -> str:
r = self.search_result
m = r.match_pattern
gps = tuple((m.group(0), *m.groups()))
return args.format.format(
*gps,
doc_id=r.doc.id,
par_id=r.par.get_id(),
url=get_url_for_match(args, r.doc, r.par),
to=self.get_replacement()[1],
)
[docs]@dataclass
class AttrModification:
search_result: SearchResult
par: DocParagraph
name: str
value: str
was_already: bool
mod_type: str = "add"
@property
def error(self) -> str | None:
return None
[docs] def format_match(self, args: ReplaceArgumentsCLI) -> str:
r = self.search_result
url = get_url_for_match(args, r.doc, r.par)
if self.mod_type == "add":
action = "existing" if self.was_already else "added"
else:
raise NotImplementedError
return f'{url}: {action} attribute {self.name}="{self.value}"'
[docs]def perform_replace(
d: DocInfo,
args: ReplaceArguments,
) -> Generator[ReplacementResult | AttrModification, None, None]:
"""Performs a search-and-replace operation for the specified document, yielding list of :class:`ReplacementResult` or :class:`AttrModification`.
:param args: The replacement arguments. If args.dryrun is True, no actual replacement will occur.
:param d: The document to process.
"""
for r in search(d, args, use_exported=False):
repl: None | ReplacementResult | AttrModification = None
old_md = None
new_md = None
mi = r.par.doc.get_settings().get_macroinfo(default_view_ctx)
if args.to is not None:
repl = ReplacementResult(search_result=r, replacement=args.to)
old_md = r.par.get_markdown()
new_md = repl.get_new_markdown()
if r.par.is_yaml():
try:
yb = YamlBlock.from_markdown(r.par.get_expanded_markdown(mi))
except YAMLError:
repl.error = (
f"YAML is invalid before replacement, so not doing anything"
)
except TimDbException as e:
repl.error = f"Exception: {str(e)}"
if not repl.error:
try:
p_temp = r.par.clone()
p_temp.set_markdown(new_md)
yb_new = YamlBlock.from_markdown(
p_temp.get_expanded_markdown(mi)
)
except YAMLError:
repl.error = "YAML would be invalid after replacement, so not doing anything"
yield repl
old_attrs = copy(r.par.get_attrs())
if args.add_attr is not None:
attr_name, attr_value = args.add_attr.split("=")
repl = AttrModification(
search_result=r,
par=r.par,
name=attr_name,
value=attr_value,
was_already=old_attrs.get(attr_name) == attr_value,
)
yield repl
r.par.set_attr(attr_name, attr_value)
if not repl:
raise Exception("--to or --add-attr must be given")
if not args.dryrun and not repl.error:
# The method get_new_markdown replaces all occurrences in a paragraph,
# so some ReplacementResults are (in this sense) redundant.
if old_md == new_md and old_attrs == r.par.get_attrs():
continue
if new_md is not None:
r.par.set_markdown(new_md)
# TODO currently DocParagraph.store doesn't take self.attrs into account; it only saves __data dict.
# So for now we just modify the dict reference directly.
r.par.clear_cache()
r.par.save()
[docs]def replace_and_print(d: DocInfo, args: ReplaceArgumentsCLI) -> int:
"""Same as :func:`perform_replace`, but prints the matches according to the provided format."""
n = 0
for r in perform_replace(d, args):
n = r.search_result.num_pars_found
if r.error:
print(r.error + ":")
print(f"{r.format_match(args)}")
return n
if __name__ == "__main__":
parser = create_basic_search_argparser(
"Replaces strings in documents", is_readonly=False
)
group_action = parser.add_mutually_exclusive_group(required=True)
group_action.add_argument(
"--to",
help=r"The replacement string. Use \1, \2, ... for referencing regex groups.",
)
group_action.add_argument(
"--add-attr", help="An attribute to add to the matching paragraphs."
)
process_items(replace_and_print, parser)