# -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import re
import os
import sys
import tempfile
from subprocess import call
import operator
import itertools
import hashlib

from colorama import init, Style, Fore


NUM_CONTEXT_LINES = 5
TAB_WIDTH = 4

# Matches /* */ style comments
comment_block_re = re.compile("""^[ \t]*/\*.+?\*/""", re.DOTALL | re.MULTILINE)
# Matches // style comments
single_comment_re = re.compile("""^([ \t]*//[^\n\r]*)(\n[ \t]*//[^\n\r]*)*""", re.MULTILINE)
# Matches assorted ASCII art at least 3 characters long
ascii_re = re.compile("[-\*+=#]{3,}")
# Attempts to extract content out of comments featuring garbage symbols
content_re = re.compile(r"([^-\s\|\\/\*+=#].+?)[\|\\/\*=#]*$")

keyword_re_base = "(class(es)?|constructor[s]?|declaration[s]?|define[s]|destructor[s]?|enum[s]?|export(s|ed)|extern" \
                  "[s]?|factor(y|ies)|forward|friend[s]?|function[s]?|implementation[s]?|inline[s]?|interface[s]?|" \
                  "internal[s]?|method[s]?|namespace[s]?|operator[s]?|private|propert(y|ies)|protected|public|static|" \
                  "struct[s]?|typedef[s]?|virtual)"


class Comment:
    def __init__(self, lines, match, start, end, multiline):
        self.match = match

        if start > end:
            start = end
        self.lines = lines[start:end + 1]
        self.start = start
        self.end = end
        self.multiline = multiline
        self.before = self.after = self.before_len = self.after_len = self.before_start = self.after_end = None

        # Inspect whitespace
        self.min_whitespace_len = sys.maxint
        for line in self.lines:
            line_ws_len = get_leading_space_len(line)
            if line_ws_len < self.min_whitespace_len:
                self.min_whitespace_len = line_ws_len

        # The comment block as a whole should be aligned to 4 space increments
        self.min_whitespace_len = TAB_WIDTH * (self.min_whitespace_len // TAB_WIDTH)
        self.content_lines = extract_comment_content(self)

    def fill_context(self, lines):
        # Calculate number of context lines
        self.before_len = min(NUM_CONTEXT_LINES, self.start)
        self.after_len = min(NUM_CONTEXT_LINES, len(lines) - self.end - 1)
        self.before_start = self.start - self.before_len
        self.after_end = self.end + self.after_len

        # Extract lines
        self.before = lines[self.before_start:self.start]
        self.after = lines[self.end + 1:self.after_end + 1]

    @property
    def raw_content(self):
        return self.match.group(0)


def display_lines(before, content, after, start_line, color):
    context_before = [Style.DIM + "%4d: " % (start_line + i) + Fore.RESET + line + Style.RESET_ALL for i, line in
                      enumerate(before)]
    offset = start_line + len(before)
    match_lines = [color + Style.DIM + "%4d: " % (offset + i) + Style.BRIGHT + line + Style.RESET_ALL + Fore.RESET for
                   i, line in enumerate(content)]
    offset = start_line + len(content) + len(before)
    context_after = [Style.DIM + "%4d: " % (offset + i) + Fore.RESET + line + Style.RESET_ALL for i, line in
                     enumerate(after)]
    print "\n".join(itertools.chain(context_before, match_lines, context_after)) + "\n"


def display_commands():
    print Fore.MAGENTA + Style.BRIGHT + "t - take best guess"
    print "d - delete entire block"
    print "f - finish up with this file"
    print "i - ignore (or press ENTER)"
    print "q - quit, discarding pending changes"
    print "v - edit best guess"
    print "V - edit original" + Fore.RESET + Style.RESET_ALL


def extract_comments(contents):
    matches = []
    lines = contents.splitlines()

    # Search for for comment blocks and // style comments
    for match in itertools.chain(comment_block_re.finditer(contents), single_comment_re.finditer(contents)):
        line_num_start = contents.count("\n", 0, match.start()) + 1

        # We might have come up short
        if line_num_start > 0 and lines[line_num_start - 1] == match.group(0).splitlines()[0]:
            line_num_start -= 1

        matches.append(
            Comment(lines, match, line_num_start, contents.count("\n", 0, match.end()), match.re == comment_block_re))

    return sorted(matches, key=operator.attrgetter("start"), reverse=True)


def extract_comment_content(comment):
    lines = []
    seen_content = False
    for i, line in enumerate(comment.lines):
        # Check for blank-ish lines
        if i < len(comment.lines) - 1 and seen_content and not line.translate(None, "\\/-+=#*!|_").strip():
            lines.append(("", ""))

        # Check for content in the line
        match = content_re.search(line)
        if match:
            seen_content = True
            content = ascii_re.sub("", match.group(1))
            lines.append((" " * get_leading_space_len(line), content))

    return trim_newlines(lines, reverse=True, extractor=operator.itemgetter(1), allow_leading=False)


def filter_comments(comments):
    ret = []
    for comment in comments:
        # Ignore license text or Vim stuff at the start of the file
        if comment.start <= 10 and "license" in comment.raw_content.lower() or "tab-width: 4;" in comment.raw_content:
            continue

        if is_first_line_useless(comment):
            comment.content_lines.pop(0)
            ret.append(comment)

        # Take comments containing ASCII art
        if ascii_re.search(comment.raw_content):
            ret.append(comment)

    return ret


def get_leading_space_len(line):
    original_len = len(line)
    if len(re.findall(r"\b\w+\b", line)) > 0:
        match = re.match(r"^(\s*[/\\\*|]+\s*)(.+)", line)
        if match:
            return original_len - len(match.group(2))

    return original_len - len(line.lstrip())


def is_first_line_useless(comment):
    # There are no content lines, but this will probably be caught by the ASCII filter anyway
    if len(comment.content_lines) == 0:
        return False

    original_line = comment.content_lines[0][1]
    original_word_count = len(re.findall(r"\b\w+\b", original_line))

    if original_word_count == 1:
        # Check for comment that look like: // Method()
        # or: // ~Method()
        if re.match("\s*(~\w+(\s*\(\s*\)\s*)?)|(\w+\s*\(\s*\))\s*", original_line):
            # Only mark the comment as useless if it doesn't occur within a method
            # To check for this, we simply inspect the leading whitespace
            if comment.min_whitespace_len == 0:
                return True

    transformed_words = re.findall(r"\b\w+\b", re.sub(r"\b%s\b" % keyword_re_base, "", comment.content_lines[0][1]))
    new_word_count = len(transformed_words)

    # Line was unchanged, so the line must mean something
    if new_word_count == original_word_count:
        return False

    if new_word_count == 0:
        return True
    elif new_word_count == 1:
        # Ignore bug IDs (e.g. fdo12345)
        if re.match("\w+\d{2,}", transformed_words[0].lower()):
            return False

        # Ignore TODOs and FIXMEs
        return not transformed_words[0].lower() in ["todo", "fixme", "fix"]

    return False


def overwrite_range(start, end, target, source):
    for i in range(end, start - 1, -1):
        target.pop(i)

    for line in reversed(source):
        target.insert(start, line.rstrip())


def reformat_comment(comment):
    min_whitespace = " " * comment.min_whitespace_len
    if len(comment.content_lines) > 5:
        # Preserve blank lines in /* */ style comment blocks
        new_comments = []
        for line in reversed(comment.content_lines):
            # Always take comments with content, but only take blank lines if we've already
            # seen a non-blank line and the last line is non-blank
            if line[1] or (not line[1] and new_comments and new_comments[-1][1]):
                new_comments.append(line)

        new_comments = list(reversed(new_comments))
    else:
        # Eliminate blank lines in // style comment blocks
        new_comments = [line for line in comment.content_lines if line[1]]

    if len(new_comments) > 5:
        return ["%s/*" % min_whitespace] + ["%s%s" % (match[0], match[1]) for match in new_comments] + [
            "%s*/" % min_whitespace]

    return ["%s// %s" % (min_whitespace, match[1]) for match in new_comments]


def refine_whitespace(comment):
    before = trim_newlines(comment.before, reverse=True)
    content = trim_newlines(trim_newlines(comment.content_lines, reverse=True, extractor=operator.itemgetter(1)),
                            extractor=operator.itemgetter(1))
    after = trim_newlines(comment.after)

    # Condense adjacent newlines between each section
    if content:
        if before and not before[-1] and not content[0][1]:
            content.pop(0)
        if after and not content[-1][1] and not after[0]:
            content.pop(-1)
    else:
        if before and after and not before[-1] and not after[0]:
            before.pop(-1)

    comment.before = before
    comment.content_lines = content
    comment.after = after

    # Recalculate various comment properties
    comment.content_lines_len = len(content)
    comment.before_len = len(before)
    comment.after_len = len(after)
    comment.after_end = comment.end + comment.after_len


def trim_newlines(lines, reverse=False, extractor=None, allow_leading=True):
    new_lines = []
    if reverse:
        lines = reversed(lines)
    for line in lines:
        temp = extractor(line).strip() if extractor else line.strip()
        if temp or (not new_lines and allow_leading) or (not temp and new_lines and new_lines[-1]):
            new_lines.append(line)
    return list(reversed(new_lines)) if reverse else new_lines


def process_file(path, ignores_in):
    ignores_out = []
    with open(path, "rw") as file_obj:
        contents = file_obj.read()
        if not contents:
            return

        has_trailing_newline = contents[-1] == '\n'
        lines = contents.splitlines()

        # Extract all comments
        comments = filter_comments(extract_comments(contents))

        ranges = []
        for comment in comments:
            ranges.append((comment.start, comment.end))

        pending_saves = 0
        rescan_needed = False
        for comment in comments:
            if rescan_needed:
                break

            if ignores_in and ignores_in[-1] == hashlib.md5(comment.raw_content).hexdigest():
                ignores_out.append(ignores_in.pop(-1))
                continue

            os.system("clear")
            comment.fill_context(lines)
            ranges.pop(0)

            # Check whether the context lines for this comment intersect the next match
            check_modified_context = ranges and (
            comment.before_start <= ranges[0][0] < comment.start or comment.before_start <= ranges[0][
                1] < comment.start)

            # Display the lines
            file_name = path + (" (%d changes pending save)" % pending_saves if pending_saves else "")
            print Style.BRIGHT + "Original (%s):" % file_name + Style.RESET_ALL
            display_lines(comment.before, comment.lines, comment.after, comment.before_start, Fore.RED)

            # Reformat the comment
            refine_whitespace(comment)
            new_content = reformat_comment(comment)

            # Now refine the guess using our context information
            print Style.BRIGHT + "Best guess:" + Style.RESET_ALL
            display_lines(comment.before, new_content, comment.after, comment.before_start, Fore.GREEN)

            # Read command
            command = None
            while not command in ['t', 'd', 'f', 'i', 'q', 'v', 'V']:
                sys.stdout.write(
                    Fore.YELLOW + Style.BRIGHT + "Choice [d,f,i,t,q,v,V,?] ? " + Fore.RESET + Style.RESET_ALL)
                command = sys.stdin.readline()[0]
                if command == '?':
                    display_commands()
                    command = None

            # Process command
            if command == 'd':
                overwrite_range(comment.start, comment.end, lines, [])
            elif command == 't':
                overwrite_range(comment.start, comment.end, lines, new_content)
            elif command == 'q':
                exit(0)
            elif command == 'i':
                ignores_out.append(hashlib.md5(comment.raw_content).hexdigest())
                continue
            elif command == 'f':
                break
            elif command == 'v' or command == 'V':
                with tempfile.NamedTemporaryFile(suffix=".tmp") as tmp:
                    tmp.write(
                        "\n".join(comment.before + (comment.lines if command == 'V' else new_content) + comment.after))
                    tmp.flush()
                    call(["vim", tmp.name])
                    tmp.file.seek(0)
                    result = tmp.read().splitlines()

                overwrite_range(comment.before_start, comment.after_end, lines, result)

                # Check whether we modified the next match
                if check_modified_context:
                    if len(result) < comment.before_len:
                        rescan_needed = True
                    else:
                        for i, line in enumerate(comment.before):
                            if line != result[i] and ranges[0][0] <= comment.before_start + i <= ranges[0][1]:
                                rescan_needed = True
                                break

            pending_saves += 1

        if pending_saves:
            with open(path, "w") as file_obj:
                file_obj.write("\n".join(lines) + ('\n' if has_trailing_newline else ''))
                file_obj.flush()

    return rescan_needed, ignores_out


def main():
    init() # for Colorama

    for root, dirs, files in os.walk("."):
        for file_name in files:
            if not file_name.endswith(".cxx") and not file_name.endswith(".hxx"):
                continue

            path = os.path.join(root, file_name)
            rescan = True
            ignores = []
            while rescan:
                rescan, ignores = process_file(path, ignores)


if __name__ == "__main__":
    main()