"""
Faraday Penetration Test IDE
Copyright (C) 2013  Infobyte LLC (http://www.infobytesec.com/)
See the file 'doc/LICENSE' for the license information

"""
import binascii
import re
import base64
import distutils.util  # pylint: disable=import-error
from urllib.parse import urlsplit
import lxml.etree as ET

from bs4 import BeautifulSoup, Comment

from faraday_plugins.plugins.plugin import PluginXMLFormat
from faraday_plugins.plugins.plugins_utils import CVE_regex, CWE_regex

__author__ = "Francisco Amato"
__copyright__ = "Copyright (c) 2013, Infobyte LLC"
__credits__ = ["Francisco Amato", "Micaela Ranea Sanchez", "Dante Acosta"]
__license__ = ""
__version__ = "1.1.0"
__maintainer__ = "Francisco Amato"
__email__ = "famato@infobytesec.com"
__status__ = "Development"


class BurpXmlParser:
    """
    The objective of this class is to parse an xml file generated by the burp tool.

    TODO: Handle errors.
    TODO: Test burp output version. Handle what happens if the parser doesn't support it.
    TODO: Test cases.

    @param burp_xml_filepath A proper xml generated by burp
    """

    def __init__(self, xml_output):

        self.target = None
        self.port = "80"
        self.host = None

        tree = self.parse_xml(xml_output)
        if tree is not None:
            self.items = [data for data in self.get_items(tree)]
        else:
            self.items = []

    def parse_xml(self, xml_output):
        """
        Open and parse an xml file.

        TODO: Write custom parser to just read the nodes that we need instead of
        reading the whole file.

        @return xml_tree An xml tree instance. None if error.
        """
        try:
            parser = ET.XMLParser(recover=True, huge_tree=True)
            tree = ET.fromstring(xml_output, parser=parser)
        except ET.XMLSyntaxError as err:
            print(f"XMLSyntaxError: {err}. {xml_output}")
            return None

        return tree

    def get_items(self, tree):
        """
        @return items A list of Host instances
        """

        for node in tree.findall('issue'):
            yield Item(node)


def get_attrib_from_subnode(xml_node, subnode_xpath_expr, attrib_name):
    """
    Finds a subnode in the item node and the retrieves a value from it

    @return An attribute value
    """
    node = xml_node.find(subnode_xpath_expr)

    if node is not None:
        return node.get(attrib_name)

    return None


class Item:
    """
    An abstract representation of a Item
    @param item_node A item_node taken from an burp xml tree
    """

    def __init__(self, item_node):
        self.node = item_node

        name = item_node.find('name').text
        host_node = item_node.find('host')
        path = item_node.find('path').text
        location = item_node.find('location').text
        severity = item_node.find('severity').text
        external_id = item_node.find('type').text
        request = self.decode_binary_node(item_node.find('./requestresponse/request'))
        response = self.decode_binary_node(item_node.find('./requestresponse/response'))
        detail = self.do_clean(item_node.find('issueDetail'))
        remediation = self.do_clean(item_node.find('remediationBackground'))
        background = self.do_clean(item_node.find('issueBackground'))
        self.references = self.do_clean(item_node.find('references'))
        self.vuln_class = self.do_clean(item_node.find('vulnerabilityClassifications'))
        self.cve = []
        if background:
            cve = CVE_regex.search(background)
            if cve:
                self.cve = [cve.group()]

        self.url = host_node.text

        url_data = urlsplit(self.url)

        self.protocol = url_data.scheme
        self.host = url_data.hostname

        # Use the port in the URL if it is defined, or 80 or 443 by default
        self.port = url_data.port or (443 if url_data.scheme == "https" else 80)

        self.name = name
        self.path = path
        loc = re.search(r"(?<=\[).+?(?=\])", location.replace(self.path, ""))
        self.location = loc.group().split(" ")[0] if loc else ""

        self.ip = host_node.get('ip')
        self.url = self.node.get('url')
        self.severity = severity
        self.request = request
        self.response = response
        self.detail = detail
        self.remediation = remediation
        self.background = background
        self.external_id = external_id

    @staticmethod
    def do_clean(value):

        myreturn = ""
        if value is not None:
            myreturn = value.text
        return myreturn

    def decode_binary_node(self, node):
        """
        Finds a subnode matching `path` and returns its inner text if
        it has no base64 attribute or its base64 decoded inner text if
        it has it.
        """
        if node is not None:
            encoded = distutils.util.strtobool(node.get('base64', 'false'))
            if encoded:
                text = node.text or ""
                text += "=" * (-len(text) % 4)
                try:
                    res = base64.b64decode(text, validate=False).decode('utf-8', errors="backslashreplace")
                except binascii.Error:
                    res = "Truncated Base64 data: unable to decode. Raw content:\n" + text
            else:
                res = node.text
            return "".join([ch for ch in res if ord(ch) <= 128])
        return ""

    def get_text_from_subnode(self, subnode_xpath_expr):
        """
        Finds a subnode in the host node and the retrieves a value from it.
        @return An attribute value
        """

        sub_node = self.node.find(subnode_xpath_expr)
        if sub_node is not None:
            return sub_node.text

        return None


class BurpPlugin(PluginXMLFormat):
    """
    Example plugin to parse burp output.
    """

    def __init__(self, *arg, **kwargs):
        super().__init__(*arg, **kwargs)
        self.identifier_tag = "issues"
        self.id = "Burp"
        self.name = "Burp XML Output Plugin"
        self.plugin_version = "0.0.2"
        self.version = "1.6.05 BurpPro"
        self.framework_version = "1.0.0"
        self.options = None
        self._current_output = None
        self.target = None

    def parseOutputString(self, output):

        parser = BurpXmlParser(output)
        for item in parser.items:

            h_id = self.createAndAddHost(item.ip, hostnames=[item.host])
            s_id = self.createAndAddServiceToHost(
                h_id,
                item.protocol,
                "tcp",
                ports=[str(item.port)],
                status="open")

            desc = ""
            if item.background:
                desc += item.background
            desc = self.removeHtml(desc)
            data = ""
            if item.detail:
                data = self.removeHtml(item.detail)
            ref = []
            if item.references:
                ref += self.get_url(item.references)
            cwe = []
            if item.vuln_class:
                for cwe_ref in self.get_ref(item.vuln_class):
                    if CWE_regex.search(cwe_ref):
                        cwe.append(CWE_regex.search(cwe_ref).group())
            resolution = self.removeHtml(item.remediation) if item.remediation else ""

            self.createAndAddVulnWebToService(
                h_id,
                s_id,
                item.name,
                desc=desc,
                data=data,
                severity=item.severity,
                website=item.host,
                path=item.path,
                request=item.request,
                response=item.response,
                resolution=resolution,
                ref=ref,
                params=item.location,
                external_id=item.external_id,
                cve=item.cve,
                cwe=cwe
            )

        del parser

    def removeHtml(self, markup):
        soup = BeautifulSoup(markup, "html.parser")

        # Replace line breaks and paragraphs for new lines
        for tag in soup.find_all(["br", "p"]):
            tag.append("\n")
            tag.unwrap()

        # Replace lists for * and new lines
        for tag in soup.find_all(["ul", "ol"]):
            for item in tag.find_all("li"):
                item.insert_before("* ")
                item.append("\n")
                item.unwrap()
            tag.unwrap()

        # Remove all other HTML tags
        for tag in soup.find_all():
            tag.unwrap()

        # Remove all comments
        for child in soup.children:
            if isinstance(child, Comment):
                child.extract()

        return str(soup)

    def get_ref(self, markup):
        soup = BeautifulSoup(markup, "html.parser")

        for tag in soup.find_all("ul"):
            for item in tag.find_all("li"):
                for a in item.find_all("a"):
                    a.unwrap()
                item.unwrap()
            tag.unwrap()
        ref = str(soup).strip().split("\n")
        return ref

    def get_url(self, markup):
        soup = BeautifulSoup(markup, "html.parser")
        ref = []
        for tag in soup.find_all("ul"):
            for item in tag.find_all("li"):
                for a in item.find_all("a"):
                    ref += [a['href'].strip()]
        return ref


def createPlugin(*args, **kwargs):
    return BurpPlugin(*args, **kwargs)
