#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
#

import re

class CpyTokenizer:

    def __init__(self, cpydata):

        self.line_sep = r'[.,;]?$|[.,;]?\s'
        self.s_quote = r"'[^']*'"
        self.d_quote = r'"[^"]*"'
        self.reg_ex = re.compile("(%s|%s|%s)" % (self.line_sep, self.s_quote, self.d_quote))

        self.tokens = self.cpyTokenize(cpydata)


    def cpyClean(self, cpydata):
        
        cpydata = [ line[6:72].rstrip() for line in cpydata.split('\n') if len(line) > 6 and line[6] not in ('*','/') ]
        cpydata = [ line for line in cpydata if line.strip() not in ("EJECT", "SKIP1", "SKIP2", "SKIP3")]
        cpydata = [ line for line in cpydata if len(line) > 0]
        cpydata = ' '.join(cpydata)
        
        return cpydata


    def cpyTokenize(self, cpydata):

        clean_cpydata = self.cpyClean(cpydata)
        tokens = [token.strip() for token in re.split(self.reg_ex, clean_cpydata) if token.strip()]

        return tokens


    def getToken(self):

        if self.tokens:
            token = self.tokens.pop(0)
        else:
            token = None
        
        return token

    def putToken(self, token):

        if token:
            self.tokens.insert(0,token)