#
# All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or
# its licensors.
#
# For complete copyright and license terms please see the LICENSE at the root of this
# distribution (the "License"). All use of this software is governed by the License,
# or, if provided, by the license below or the license accompanying this file. Do not
# remove or modify any license notices. This file is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
# $Revision$

import codecs
import fnmatch
import os
import sys

BUFSIZE = 4096
BOMLEN = len(codecs.BOM_UTF8)

NO_BOM_MESSAGE =        '  No BOM:         {}'
REMOVING_BOM_MESSAGE =  '  Removing BOM:   {}'
READ_ONLY_BOM_MESSAGE = '! ReadOnly BOM:   {}'
NON_EXISTANCE_MESSAGE = '! Does not exist: {}'


def file_has_bom(path):
    print("Scanning {}".format(path))
    with open(path, "rb") as fp:
        chunk = fp.read(len(codecs.BOM_UTF8))
        return chunk.startswith(codecs.BOM_UTF8)


def remove_bom_from_file(path):
    print("... removing bom from {}".format(path))
    with open(path, "r+b") as fp:
        chunk = fp.read(BUFSIZE)
        assert(chunk.startswith(codecs.BOM_UTF8))
        i = 0
        chunk = chunk[BOMLEN:]
        while chunk:
            fp.seek(i)
            fp.write(chunk)
            i += len(chunk)
            fp.seek(BOMLEN, os.SEEK_CUR)
            chunk = fp.read(BUFSIZE)
        fp.seek(-BOMLEN, os.SEEK_CUR)
        fp.truncate()


def remove_bom_from_directory(path, pattern):
    for child_name in os.listdir(path):
        child_path = os.path.join(path, child_name)
        if os.path.isdir(child_path):
            remove_bom_from_directory(child_path, pattern)
        elif fnmatch.fnmatch(child_name, pattern):
            if file_has_bom(child_path):
                if os.access(child_path, os.W_OK):
                    print(REMOVING_BOM_MESSAGE.format(child_path))
                    remove_bom_from_file(child_path)
                else:
                    print(READ_ONLY_BOM_MESSAGE.format(child_path))
            else:
                # print NO_BOM_MESSAGE.format(child_path)
                pass


if len(sys.argv) < 2:
    print('Usage: remove_bom <PATTERN> [<DIR>...]')
    print('')
    print('Will remove UTF-8 BOM from files with names that match <PATTERN>')
    print('found in the specified directories, recursively. If no directories')
    print('are specified, the current working directory is used.')
    exit(1)

pattern = sys.argv[1]
directories = sys.argv[2:] or [os.getcwd()]
for directory in directories:
    if os.path.isdir(directory):
        remove_bom_from_directory(directory, pattern)
    else:
        NON_EXISTANCE_MESSAGE.format(directory)