x3x3x3x_5h3ll
— 53cur3 — 5h3ll_1d —
Linux vps-10654784.cedaps.org.br 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue Jun 4 14:43:51 UTC 2024 x86_64
  INFO SERVER : Apache PHP : 7.4.33
/lib64/python3.6/site-packages/lxml/html/
162.240.100.168

 
[ NAME ] [ SIZE ] [ PERM ] [ DATE ] [ ACTN ]
+FILE +DIR
__pycache__ dir drwxr-xr-x 2024-05-05 22:43 R D
ElementSoup.py 0.312 KB -rw-r--r-- 2011-09-25 16:58 R E G D
__init__.py 63.368 KB -rw-r--r-- 2018-03-13 20:13 R E G D
_diffcommand.py 2.085 KB -rw-r--r-- 2017-09-17 07:43 R E G D
_html5builder.py 3.17 KB -rw-r--r-- 2012-09-28 19:13 R E G D
_setmixin.py 1.085 KB -rw-r--r-- 2016-05-05 07:08 R E G D
builder.py 4.209 KB -rw-r--r-- 2011-09-25 16:58 R E G D
clean.cpython-36m-x86_64-linux-gnu.so 312.953 KB -rwxr-xr-x 2022-05-10 20:38 R E G D
clean.py 26.501 KB -rw-r--r-- 2022-05-10 20:37 R E G D
defs.py 4.114 KB -rw-r--r-- 2022-05-10 20:37 R E G D
diff.cpython-36m-x86_64-linux-gnu.so 419.563 KB -rwxr-xr-x 2022-05-10 20:38 R E G D
diff.py 29.786 KB -rw-r--r-- 2017-05-01 14:11 R E G D
formfill.py 9.462 KB -rw-r--r-- 2017-06-03 16:47 R E G D
html5parser.py 8.432 KB -rw-r--r-- 2017-09-17 07:43 R E G D
soupparser.py 9.964 KB -rw-r--r-- 2017-09-17 07:43 R E G D
usedoctest.py 0.243 KB -rw-r--r-- 2011-09-25 16:58 R E G D
REQUEST EXIT
import difflib from lxml import etree from lxml.html import fragment_fromstring import re __all__ = ['html_annotate', 'htmldiff'] try: from html import escape as html_escape except ImportError: from cgi import escape as html_escape try: _unicode = unicode except NameError: # Python 3 _unicode = str try: basestring except NameError: # Python 3 basestring = str ############################################################ ## Annotation ############################################################ def default_markup(text, version): return '%s' % ( html_escape(_unicode(version), 1), text) def html_annotate(doclist, markup=default_markup): """ doclist should be ordered from oldest to newest, like:: >>> version1 = 'Hello World' >>> version2 = 'Goodbye World' >>> print(html_annotate([(version1, 'version 1'), ... (version2, 'version 2')])) Goodbye World The documents must be *fragments* (str/UTF8 or unicode), not complete documents The markup argument is a function to markup the spans of words. This function is called like markup('Hello', 'version 2'), and returns HTML. The first argument is text and never includes any markup. The default uses a span with a title: >>> print(default_markup('Some Text', 'by Joe')) Some Text """ # The basic strategy we have is to split the documents up into # logical tokens (which are words with attached markup). We then # do diffs of each of the versions to track when a token first # appeared in the document; the annotation attached to the token # is the version where it first appeared. tokenlist = [tokenize_annotated(doc, version) for doc, version in doclist] cur_tokens = tokenlist[0] for tokens in tokenlist[1:]: html_annotate_merge_annotations(cur_tokens, tokens) cur_tokens = tokens # After we've tracked all the tokens, we can combine spans of text # that are adjacent and have the same annotation cur_tokens = compress_tokens(cur_tokens) # And finally add markup result = markup_serialize_tokens(cur_tokens, markup) return ''.join(result).strip() def tokenize_annotated(doc, annotation): """Tokenize a document and add an annotation attribute to each token """ tokens = tokenize(doc, include_hrefs=False) for tok in tokens: tok.annotation = annotation return tokens def html_annotate_merge_annotations(tokens_old, tokens_new): """Merge the annotations from tokens_old into tokens_new, when the tokens in the new document already existed in the old document. """ s = InsensitiveSequenceMatcher(a=tokens_old, b=tokens_new) commands = s.get_opcodes() for command, i1, i2, j1, j2 in commands: if command == 'equal': eq_old = tokens_old[i1:i2] eq_new = tokens_new[j1:j2] copy_annotations(eq_old, eq_new) def copy_annotations(src, dest): """ Copy annotations from the tokens listed in src to the tokens in dest """ assert len(src) == len(dest) for src_tok, dest_tok in zip(src, dest): dest_tok.annotation = src_tok.annotation def compress_tokens(tokens): """ Combine adjacent tokens when there is no HTML between the tokens, and they share an annotation """ result = [tokens[0]] for tok in tokens[1:]: if (not result[-1].post_tags and not tok.pre_tags and result[-1].annotation == tok.annotation): compress_merge_back(result, tok) else: result.append(tok) return result def compress_merge_back(tokens, tok): """ Merge tok into the last element of tokens (modifying the list of tokens in-place). """ last = tokens[-1] if type(last) is not token or type(tok) is not token: tokens.append(tok) else: text = _unicode(last) if last.trailing_whitespace: text += last.trailing_whitespace text += tok merged = token(text, pre_tags=last.pre_tags, post_tags=tok.post_tags, trailing_whitespace=tok.trailing_whitespace) merged.annotation = last.annotation tokens[-1] = merged def markup_serialize_tokens(tokens, markup_func): """ Serialize the list of tokens into a list of text chunks, calling markup_func around text to add annotations. """ for token in tokens: for pre in token.pre_tags: yield pre html = token.html() html = markup_func(html, token.annotation) if token.trailing_whitespace: html += token.trailing_whitespace yield html for post in token.post_tags: yield post ############################################################ ## HTML Diffs ############################################################ def htmldiff(old_html, new_html): ## FIXME: this should take parsed documents too, and use their body ## or other content. """ Do a diff of the old and new document. The documents are HTML *fragments* (str/UTF8 or unicode), they are not complete documents (i.e., no tag). Returns HTML with and