# -*- coding: utf-8 -*-
import re
import string

PUNCTUATION_REGEX = re.compile('[{0}]'.format(re.escape(string.punctuation)))


def strip_punc(s, all=False):
    """Removes punctuation from a string.

    :param s: The string.
    :param all: Remove all punctuation. If False, only removes punctuation from
        the ends of the string.
    """
    if all:
        return PUNCTUATION_REGEX.sub('', s.strip())
    else:
        return s.strip().strip(string.punctuation)


def lowerstrip(s, all=False):
    """Makes text all lowercase and strips punctuation and whitespace.

    :param s: The string.
    :param all: Remove all punctuation. If False, only removes punctuation from
        the ends of the string.
    """
    return strip_punc(s.lower().strip(), all=all)


def tree2str(tree, concat=' '):
    """Convert a nltk.tree.Tree to a string.

    For example:
        (NP a/DT beautiful/JJ new/JJ dashboard/NN) -> "a beautiful dashboard"
    """
    return concat.join([word for (word, tag) in tree])


def filter_insignificant(chunk, tag_suffixes=('DT', 'CC', 'PRP$', 'PRP')):
    """Filter out insignificant (word, tag) tuples from a chunk of text."""
    good = []
    for word, tag in chunk:
        ok = True
        for suffix in tag_suffixes:
            if tag.endswith(suffix):
                ok = False
                break
        if ok:
            good.append((word, tag))
    return good


def is_filelike(obj):
    """Return whether ``obj`` is a file-like object."""
    return hasattr(obj, 'read')