import regex


class QuestionsParser:
    def __init__(self, path_to_pdf, remainder_mod: tuple = (0, 1)):
        """
        The __init__ function is called when an instance of the class is created.

        :param path_to_pdf: Store the path to the pdf file
        :param remainder_mod: Tuple containing the remainder and the divisor respectively
        """
        import cloudscraper

        if _validate_path(path_to_pdf):
            self.__question_list = _parse_questions(path_to_pdf)

        self.__remainder_mod = remainder_mod
        self.__scraper = cloudscraper.create_scraper()
        self.__result = None

    def parse_answers_google(self):
        """
        Takes in a list of questions and returns a dictionary with the question as the key
        and another dictionary as its value. The nested dictionary contains two keys, 'link' and 'answer'. The link is
        the url of where to find an answer to that specific question on Google.com, while the answer is what Google
        tells us is an answer to that question.

        :param self: Reference the object itself in a method
        :return: QuestionsParser object
        """
        import bs4
        from QuestionsParser.utils.progress_bar import progress_bar

        result = dict()

        for i, question in enumerate(progress_bar(self.__question_list)):
            if (i+1) % self.__remainder_mod[1] != self.__remainder_mod[0]:
                continue

            question_str = str(i + 1) + ". " + question
            result[question_str] = dict()

            question = question.replace("?", "") + " in Java?"

            request_result = self.__scraper.get(f"https://www.google.com/search?q={question}")
            soup = bs4.BeautifulSoup(request_result.text, "html.parser")

            try:
                link = soup.find("div", {"class": "yuRUbf"}).find("a")["href"]
                result[question_str]["link"] = link
            except AttributeError:
                raise AttributeError("Too many requests. Please try again later")
                # link = "No link found"
                # result[question_str]["link"] = link
                # continue

            try:
                possible_answer = soup.find("span", {"class": "hgKElc"}).text
                result[question_str]["answer"] = possible_answer
            except AttributeError:
                possible_answer = "No answer found"
                result[question_str]["answer"] = possible_answer
                continue

        self.__result = result
        return self

    def write_to_file(self, path_to_file):
        """
        Write the result to a file. The file will be overwritten if it already exists.
        Note: The only file extensions supported are .pdf and .docx. If possible, use .docx, as it is more readable
        and convenient.
        """
        if self.__result is None:
            raise ValueError("No result found. Please call parse_answers_google() first")
        if regex.match(r".*\.pdf", path_to_file):
            _write_to_pdf(path_to_file, self.__result)
        elif regex.match(r".*\.docx", path_to_file):
            _write_to_docx(path_to_file, self.__result)
        else:
            raise ValueError("Only .pdf and .docx files are supported")

    def get_questions(self):
        """
        Return the list of questions
        """
        return self.__question_list


def _validate_path(path_to_pdf) -> bool:
    """
    Checks that the path to a pdf file is valid.
    If any of these tests fail, _validate_path raises an error.

    :param path_to_pdf: Check if the path is a valid pdf file
    :return: A boolean value
    """
    if not isinstance(path_to_pdf, str):
        raise TypeError("Path to pdf must be a string")
    if not regex.match(r".*\.pdf$", path_to_pdf):
        raise ValueError("Path to pdf must be a valid path to a pdf file")
    if not regex.match(r"^.*/", path_to_pdf):
        raise ValueError("Path to pdf must be an absolute path")
    return True


def _parse_questions(path_to_pdf) -> list[str]:
    """
    The parse_questions function takes a path to a pdf file and returns
    a list of strings, where each string is the text from one question.

    :param path_to_pdf: Specify the path to the pdf file containing all the questions
    :return: A list of strings
    """
    import PyPDF2

    questions_file = open(path_to_pdf, "rb")
    try:
        questions = PyPDF2.PdfFileReader(questions_file).getPage(0).extractText()
    except Exception as e:
        print(e)
        raise ValueError("Invalid PDF file")

    questions_list = [question for question in questions.split("\n") if not regex.match(r"^\d+\.\s*$", question)]
    return questions_list


def _write_to_pdf(path_to_file, result):
    """
    Write the result to a pdf file. The file will be overwritten if it already exists.
    """
    from fpdf import FPDF

    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    for question, answer in result.items():
        question = question.encode("latin-1", "replace").decode("latin-1")
        link = answer.get("link")
        answer = answer.get("answer")

        pdf.set_font("Arial", "B", size=13)
        pdf.cell(200, 10, txt=question, ln=1, align="L")
        pdf.set_font("Arial", size=12)

        if link:
            pdf.set_font("Arial", "I", size=12)
            link = link.encode("latin-1", "replace").decode("latin-1")
            pdf.multi_cell(200, 10, txt=link, align="L")
            pdf.set_font("Arial", size=12)
        if answer:
            answer = answer.encode("latin-1", "replace").decode("latin-1")
            pdf.multi_cell(200, 10, txt=answer, align="L")

    pdf.output(path_to_file)
    print(f"Successfully wrote to {path_to_file}")


def _write_to_docx(path_to_file, result):
    """
    Write the result to a docx file. The file will be overwritten if it already exists.
    """
    from docx import Document
    from docx.shared import RGBColor

    document = Document()
    document.add_heading("Answers", 0)

    for question, answer in result.items():
        link = answer.get("link")
        answer = answer.get("answer")

        document.add_heading(question, 1)

        if link:
            link_paragraph = document.add_paragraph()
            link_paragraph.add_run("Link: ").bold = True
            link_paragraph.add_run(link)

        if answer:
            if answer == "No answer found":
                run = document.add_paragraph().add_run("No answer found. Please check the link above.")
                font = run.font
                font.color.rgb = RGBColor(255, 0, 0)
            else:
                document.add_paragraph(answer)

    document.save(path_to_file)
    print(f"Successfully wrote to {path_to_file}")
