/usr/share/pyshared/closure_linter/tokenutil.py

#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Token utility functions."""

__author__ = ('robbyw@google.com (Robert Walker)',
              'ajp@google.com (Andy Perelson)')

import copy
import StringIO

from closure_linter.common import tokens
from closure_linter.javascripttokens import JavaScriptToken
from closure_linter.javascripttokens import JavaScriptTokenType

# Shorthand
Type = tokens.TokenType


def GetFirstTokenInSameLine(token):
  """Returns the first token in the same line as token.

  Args:
    token: Any token in the line.

  Returns:
    The first token in the same line as token.
  """
  while not token.IsFirstInLine():
    token = token.previous
  return token


def GetFirstTokenInPreviousLine(token):
  """Returns the first token in the previous line as token.

  Args:
    token: Any token in the line.

  Returns:
    The first token in the previous line as token, or None if token is on the
    first line.
  """
  first_in_line = GetFirstTokenInSameLine(token)
  if first_in_line.previous:
    return GetFirstTokenInSameLine(first_in_line.previous)

  return None


def GetLastTokenInSameLine(token):
  """Returns the last token in the same line as token.

  Args:
    token: Any token in the line.

  Returns:
    The last token in the same line as token.
  """
  while not token.IsLastInLine():
    token = token.next
  return token


def GetAllTokensInSameLine(token):
  """Returns all tokens in the same line as the given token.

  Args:
    token: Any token in the line.

  Returns:
    All tokens on the same line as the given token.
  """
  first_token = GetFirstTokenInSameLine(token)
  last_token = GetLastTokenInSameLine(token)

  tokens_in_line = []
  while first_token != last_token:
    tokens_in_line.append(first_token)
    first_token = first_token.next
  tokens_in_line.append(last_token)

  return tokens_in_line


def CustomSearch(start_token, func, end_func=None, distance=None,
                 reverse=False):
  """Returns the first token where func is True within distance of this token.

  Args:
    start_token: The token to start searching from
    func: The function to call to test a token for applicability
    end_func: The function to call to test a token to determine whether to abort
          the search.
    distance: The number of tokens to look through before failing search.  Must
        be positive.  If unspecified, will search until the end of the token
        chain
    reverse: When true, search the tokens before this one instead of the tokens
        after it

  Returns:
    The first token matching func within distance of this token, or None if no
    such token is found.
  """
  token = start_token
  if reverse:
    while token and (distance is None or distance > 0):
      previous = token.previous
      if previous:
        if func(previous):
          return previous
        if end_func and end_func(previous):
          return None

      token = previous
      if distance is not None:
        distance -= 1

  else:
    while token and (distance is None or distance > 0):
      next_token = token.next
      if next_token:
        if func(next_token):
          return next_token
        if end_func and end_func(next_token):
          return None

      token = next_token
      if distance is not None:
        distance -= 1

  return None


def Search(start_token, token_types, distance=None, reverse=False):
  """Returns the first token of type in token_types within distance.

  Args:
    start_token: The token to start searching from
    token_types: The allowable types of the token being searched for
    distance: The number of tokens to look through before failing search.  Must
        be positive.  If unspecified, will search until the end of the token
        chain
    reverse: When true, search the tokens before this one instead of the tokens
        after it

  Returns:
    The first token of any type in token_types within distance of this token, or
    None if no such token is found.
  """
  return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
                      None, distance, reverse)


def SearchExcept(start_token, token_types, distance=None, reverse=False):
  """Returns the first token not of any type in token_types within distance.

  Args:
    start_token: The token to start searching from
    token_types: The unallowable types of the token being searched for
    distance: The number of tokens to look through before failing search.  Must
        be positive.  If unspecified, will search until the end of the token
        chain
    reverse: When true, search the tokens before this one instead of the tokens
        after it

  Returns:
    The first token of any type in token_types within distance of this token, or
    None if no such token is found.
  """
  return CustomSearch(start_token,
                      lambda token: not token.IsAnyType(token_types),
                      None, distance, reverse)


def SearchUntil(start_token, token_types, end_types, distance=None,
                reverse=False):
  """Returns the first token of type in token_types before a token of end_type.

  Args:
    start_token: The token to start searching from.
    token_types: The allowable types of the token being searched for.
    end_types: Types of tokens to abort search if we find.
    distance: The number of tokens to look through before failing search.  Must
        be positive.  If unspecified, will search until the end of the token
        chain
    reverse: When true, search the tokens before this one instead of the tokens
        after it

  Returns:
    The first token of any type in token_types within distance of this token
    before any tokens of type in end_type, or None if no such token is found.
  """
  return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
                      lambda token: token.IsAnyType(end_types),
                      distance, reverse)


def DeleteToken(token):
  """Deletes the given token from the linked list.

  Args:
    token: The token to delete
  """
  # When deleting a token, we do not update the deleted token itself to make
  # sure the previous and next pointers are still pointing to tokens which are
  # not deleted.  Also it is very hard to keep track of all previously deleted
  # tokens to update them when their pointers become invalid.  So we add this
  # flag that any token linked list iteration logic can skip deleted node safely
  # when its current token is deleted.
  token.is_deleted = True
  if token.previous:
    token.previous.next = token.next

  if token.next:
    token.next.previous = token.previous

    following_token = token.next
    while following_token and following_token.metadata.last_code == token:
      following_token.metadata.last_code = token.metadata.last_code
      following_token = following_token.next


def DeleteTokens(token, token_count):
  """Deletes the given number of tokens starting with the given token.

  Args:
    token: The token to start deleting at.
    token_count: The total number of tokens to delete.
  """
  for i in xrange(1, token_count):
    DeleteToken(token.next)
  DeleteToken(token)


def InsertTokenBefore(new_token, token):
  """Insert new_token before token.

  Args:
    new_token: A token to be added to the stream
    token: A token already in the stream
  """
  new_token.next = token
  new_token.previous = token.previous

  new_token.metadata = copy.copy(token.metadata)

  if new_token.IsCode():
    old_last_code = token.metadata.last_code
    following_token = token
    while (following_token and
           following_token.metadata.last_code == old_last_code):
      following_token.metadata.last_code = new_token
      following_token = following_token.next

  token.previous = new_token
  if new_token.previous:
    new_token.previous.next = new_token

  if new_token.start_index is None:
    if new_token.line_number == token.line_number:
      new_token.start_index = token.start_index
    else:
      previous_token = new_token.previous
      if previous_token:
        new_token.start_index = (previous_token.start_index +
                                 len(previous_token.string))
      else:
        new_token.start_index = 0

    iterator = new_token.next
    while iterator and iterator.line_number == new_token.line_number:
      iterator.start_index += len(new_token.string)
      iterator = iterator.next


def InsertTokenAfter(new_token, token):
  """Insert new_token after token.

  Args:
    new_token: A token to be added to the stream
    token: A token already in the stream
  """
  new_token.previous = token
  new_token.next = token.next

  new_token.metadata = copy.copy(token.metadata)

  if token.IsCode():
    new_token.metadata.last_code = token

  if new_token.IsCode():
    following_token = token.next
    while following_token and following_token.metadata.last_code == token:
      following_token.metadata.last_code = new_token
      following_token = following_token.next

  token.next = new_token
  if new_token.next:
    new_token.next.previous = new_token

  if new_token.start_index is None:
    if new_token.line_number == token.line_number:
      new_token.start_index = token.start_index + len(token.string)
    else:
      new_token.start_index = 0

    iterator = new_token.next
    while iterator and iterator.line_number == new_token.line_number:
      iterator.start_index += len(new_token.string)
      iterator = iterator.next


def InsertTokensAfter(new_tokens, token):
  """Insert multiple tokens after token.

  Args:
    new_tokens: An array of tokens to be added to the stream
    token: A token already in the stream
  """
  # TODO(user): It would be nicer to have InsertTokenAfter defer to here
  # instead of vice-versa.
  current_token = token
  for new_token in new_tokens:
    InsertTokenAfter(new_token, current_token)
    current_token = new_token


def InsertSpaceTokenAfter(token):
  """Inserts a space token after the given token.

  Args:
    token: The token to insert a space token after

  Returns:
    A single space token
  """
  space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line,
                                token.line_number)
  InsertTokenAfter(space_token, token)


def InsertBlankLineAfter(token):
  """Inserts a blank line after the given token.

  Args:
    token: The token to insert a blank line after

  Returns:
    A single space token
  """
  blank_token = JavaScriptToken('', Type.BLANK_LINE, '',
                                token.line_number + 1)
  InsertLineAfter(token, [blank_token])


def InsertLineAfter(token, new_tokens):
  """Inserts a new line consisting of new_tokens after the given token.

  Args:
    token: The token to insert after.
    new_tokens: The tokens that will make up the new line.
  """
  insert_location = token
  for new_token in new_tokens:
    InsertTokenAfter(new_token, insert_location)
    insert_location = new_token

  # Update all subsequent line numbers.
  next_token = new_tokens[-1].next
  while next_token:
    next_token.line_number += 1
    next_token = next_token.next


def SplitToken(token, position):
  """Splits the token into two tokens at position.

  Args:
    token: The token to split
    position: The position to split at. Will be the beginning of second token.

  Returns:
    The new second token.
  """
  new_string = token.string[position:]
  token.string = token.string[:position]

  new_token = JavaScriptToken(new_string, token.type, token.line,
                              token.line_number)
  InsertTokenAfter(new_token, token)

  return new_token


def Compare(token1, token2):
  """Compares two tokens and determines their relative order.

  Args:
    token1: The first token to compare.
    token2: The second token to compare.

  Returns:
    A negative integer, zero, or a positive integer as the first token is
    before, equal, or after the second in the token stream.
  """
  if token2.line_number != token1.line_number:
    return token1.line_number - token2.line_number
  else:
    return token1.start_index - token2.start_index


def GoogScopeOrNoneFromStartBlock(token):
  """Determines if the given START_BLOCK is part of a goog.scope statement.

  Args:
    token: A token of type START_BLOCK.

  Returns:
    The goog.scope function call token, or None if such call doesn't exist.
  """
  if token.type != JavaScriptTokenType.START_BLOCK:
    return None

  # Search for a goog.scope statement, which will be 5 tokens before the
  # block. Illustration of the tokens found prior to the start block:
  # goog.scope(function() {
  #      5    4    3   21 ^

  maybe_goog_scope = token
  for unused_i in xrange(5):
    maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and
                        maybe_goog_scope.previous else None)
  if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope':
    return maybe_goog_scope


def GetTokenRange(start_token, end_token):
  """Returns a list of tokens between the two given, inclusive.

  Args:
    start_token: Start token in the range.
    end_token: End token in the range.

  Returns:
    A list of tokens, in order, from start_token to end_token (including start
    and end).  Returns none if the tokens do not describe a valid range.
  """

  token_range = []
  token = start_token

  while token:
    token_range.append(token)

    if token == end_token:
      return token_range

    token = token.next


def TokensToString(token_iterable):
  """Convert a number of tokens into a string.

  Newlines will be inserted whenever the line_number of two neighboring
  strings differ.

  Args:
    token_iterable: The tokens to turn to a string.

  Returns:
    A string representation of the given tokens.
  """

  buf = StringIO.StringIO()
  token_list = list(token_iterable)
  if not token_list:
    return ''

  line_number = token_list[0].line_number

  for token in token_list:

    while line_number < token.line_number:
      line_number += 1
      buf.write('\n')

    if line_number > token.line_number:
      line_number = token.line_number
      buf.write('\n')

    buf.write(token.string)

  return buf.getvalue()


def GetPreviousCodeToken(token):
  """Returns the code token before the specified token.

  Args:
    token: A token.

  Returns:
    The code token before the specified token or None if no such token
    exists.
  """

  return CustomSearch(
      token,
      lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES,
      reverse=True)


def GetNextCodeToken(token):
  """Returns the next code token after the specified token.

  Args:
    token: A token.

  Returns:
    The next code token after the specified token or None if no such token
    exists.
  """

  return CustomSearch(
      token,
      lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES,
      reverse=False)


def GetIdentifierStart(token):
  """Returns the first token in an identifier.

  Given a token which is part of an identifier, returns the token at the start
  of the identifier.

  Args:
    token: A token which is part of an identifier.

  Returns:
    The token at the start of the identifier or None if the identifier was not
    of the form 'a.b.c' (e.g. "['a']['b'].c").
  """

  start_token = token
  previous_code_token = GetPreviousCodeToken(token)

  while (previous_code_token and (
      previous_code_token.IsType(JavaScriptTokenType.IDENTIFIER) or
      _IsDot(previous_code_token))):
    start_token = previous_code_token
    previous_code_token = GetPreviousCodeToken(previous_code_token)

  if _IsDot(start_token):
    return None

  return start_token


def GetIdentifierForToken(token):
  """Get the symbol specified by a token.

  Given a token, this function additionally concatenates any parts of an
  identifying symbol being identified that are split by whitespace or a
  newline.

  The function will return None if the token is not the first token of an
  identifier.

  Args:
    token: The first token of a symbol.

  Returns:
    The whole symbol, as a string.
  """

  # Search backward to determine if this token is the first token of the
  # identifier. If it is not the first token, return None to signal that this
  # token should be ignored.
  prev_token = token.previous
  while prev_token:
    if (prev_token.IsType(JavaScriptTokenType.IDENTIFIER) or
        _IsDot(prev_token)):
      return None

    if (prev_token.IsType(tokens.TokenType.WHITESPACE) or
        prev_token.IsAnyType(JavaScriptTokenType.COMMENT_TYPES)):
      prev_token = prev_token.previous
    else:
      break

  # A "function foo()" declaration.
  if token.type is JavaScriptTokenType.FUNCTION_NAME:
    return token.string

  # A "var foo" declaration (if the previous token is 'var')
  previous_code_token = GetPreviousCodeToken(token)

  if previous_code_token and previous_code_token.IsKeyword('var'):
    return token.string

  # Otherwise, this is potentially a namespaced (goog.foo.bar) identifier that
  # could span multiple lines or be broken up by whitespace.  We need
  # to concatenate.
  identifier_types = set([
      JavaScriptTokenType.IDENTIFIER,
      JavaScriptTokenType.SIMPLE_LVALUE
      ])

  assert token.type in identifier_types

  # Start with the first token
  symbol_tokens = [token]

  if token.next:
    for t in token.next:
      last_symbol_token = symbol_tokens[-1]

      # An identifier is part of the previous symbol if it has a trailing
      # dot.
      if t.type in identifier_types:
        if last_symbol_token.string.endswith('.'):
          symbol_tokens.append(t)
          continue
        else:
          break

      # A dot is part of the previous symbol if it does not have a trailing
      # dot.
      if _IsDot(t):
        if not last_symbol_token.string.endswith('.'):
          symbol_tokens.append(t)
          continue
        else:
          break

      # Skip any whitespace
      if t.type in JavaScriptTokenType.NON_CODE_TYPES:
        continue

      # This is the end of the identifier. Stop iterating.
      break

  if symbol_tokens:
    return ''.join([t.string for t in symbol_tokens])


def GetStringAfterToken(token):
  """Get string after token.

  Args:
    token: Search will be done after this token.

  Returns:
    String if found after token else None (empty string will also
    return None).

  Search until end of string as in case of empty string Type.STRING_TEXT is not
  present/found and don't want to return next string.
  E.g.
  a = '';
  b = 'test';
  When searching for string after 'a' if search is not limited by end of string
  then it will return 'test' which is not desirable as there is a empty string
  before that.

  This will return None for cases where string is empty or no string found
  as in both cases there is no Type.STRING_TEXT.
  """
  string_token = SearchUntil(token, JavaScriptTokenType.STRING_TEXT,
                             [JavaScriptTokenType.SINGLE_QUOTE_STRING_END,
                              JavaScriptTokenType.DOUBLE_QUOTE_STRING_END])
  if string_token:
    return string_token.string
  else:
    return None


def _IsDot(token):
  """Whether the token represents a "dot" operator (foo.bar)."""
  return token.type is tokens.TokenType.NORMAL and token.string == '.'
closure-linter 2.3.13-1 / usr / share / pyshared / closure_linter / tokenutil.py