# Lexical analyzer which retokenizes pp-tokens into c-tokens.
#
# Author::    Yutaka Yanoh <mailto:yanoh@users.sourceforge.net>
# Copyright:: Copyright (C) 2010-2012, OGIS-RI Co.,Ltd.
# License::   GPLv3+: GNU General Public License version 3 or later
#
# Owner::     Yutaka Yanoh <mailto:yanoh@users.sourceforge.net>

#--
#     ___    ____  __    ___   _________
#    /   |  / _  |/ /   / / | / /__  __/           Source Code Static Analyzer
#   / /| | / / / / /   / /  |/ /  / /                   AdLint - Advanced Lint
#  / __  |/ /_/ / /___/ / /|  /  / /
# /_/  |_|_____/_____/_/_/ |_/  /_/   Copyright (C) 2010-2012, OGIS-RI Co.,Ltd.
#
# This file is part of AdLint.
#
# AdLint is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# AdLint is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# AdLint.  If not, see <http://www.gnu.org/licenses/>.
#
#++

require "adlint/lexer"
require "adlint/c/scanner"
require "adlint/c/scope"

module AdLint #:nodoc:
module C #:nodoc:

  class Lexer < TokensRelexer
    def initialize(c_source)
      super(c_source.pp_tokens)
      @translation_unit_fpath = c_source.root_fpath

      @last_token = nil
      @next_token = nil
      @typedef_names = ScopedNameSet.new
      @object_names = ScopedNameSet.new
      @identifier_translation = true
    end

    attr_reader :translation_unit_fpath

    def add_typedef_name(token)
      @typedef_names.add(token.value)
    end

    def add_object_name(token)
      @object_names.add(token.value)
    end

    def enter_scope
      @typedef_names.enter_scope
      @object_names.enter_scope
    end

    def leave_scope
      @typedef_names.leave_scope
      @object_names.leave_scope
    end

    def start_identifier_translation
      @identifier_translation = true
    end

    def stop_identifier_translation
      @identifier_translation = false
    end

    private
    def create_context(token_array)
      LexerContext.new(TokensContent.new(token_array))
    end

    def tokenize(context)
      if @next_token
        token = @next_token
        @next_token = nil
      else
        until context.content.empty?
          pp_token = context.content.next_token

          if type_hint = pp_token.type_hint
            token = pp_token.class.new(type_hint, pp_token.value,
                                       pp_token.location)
          else
            token = retokenize_keyword(pp_token, context)        ||
                    retokenize_constant(pp_token, context)       ||
                    retokenize_string_literal(pp_token, context) ||
                    retokenize_null_constant(pp_token, context)  ||
                    retokenize_identifier(pp_token, context)     ||
                    retokenize_punctuator(pp_token, context)
          end

          break if token
        end
      end

      if token
        case token.type
        when :IDENTIFIER
          token = translate_identifier(token, context)
        when :STRING_LITERAL
          token = concat_contiguous_string_literals(token, context)
        end
        @last_token = token
      else
        nil
      end
    end

    def translate_identifier(token, context)
      if @identifier_translation
        if token.type == :IDENTIFIER
          return token if @object_names.include?(token.value)
          if @typedef_names.include?(token.value)
            unless @last_token and
                   @last_token.type == :STRUCT || @last_token.type == :UNION ||
                   @last_token.type == :ENUM ||
                   @last_token.type == "->" || @last_token.type == "."
              token =
                token.class.new(:TYPEDEF_NAME, token.value, token.location)
            end
          end
        end
      end
      token
    end

    def concat_contiguous_string_literals(token, context)
      until context.content.empty?
        next_token = tokenize(context)
        if next_token.type == :STRING_LITERAL
          return token.class.new(token.type, token.value.sub(/"\z/, "") +
                                 next_token.value.sub(/\AL?"/, ""),
                                 token.location)
        else
          @next_token = next_token
          break
        end
      end
      token
    end

    def retokenize_keyword(pp_token, context)
      if keyword = Scanner::KEYWORDS[pp_token.value]
        pp_token.class.new(keyword, pp_token.value, pp_token.location)
      else
        nil
      end
    end

    def retokenize_constant(pp_token, context)
      # NOTE: For extended bit-access operators.
      return nil if @last_token && @last_token.type == :IDENTIFIER

      case pp_token.value
      when /\AL?'.*'\z/,
           /\A(?:[0-9]*\.[0-9]+|[0-9]+\.)[FL]*\z/i,
           /\A(?:[0-9]*\.[0-9]*E[+-]?[0-9]+|[0-9]+\.?E[+-]?[0-9]+)[FL]*\z/i,
           /\A(?:0x[0-9a-f]+|0b[01]+|[0-9]+)[UL]*\z/i
        pp_token.class.new(:CONSTANT, pp_token.value, pp_token.location)
      else
        nil
      end
    end

    def retokenize_string_literal(pp_token, context)
      if pp_token.value =~ /\AL?".*"\z/
        pp_token.class.new(:STRING_LITERAL, pp_token.value, pp_token.location)
      else
        nil
      end
    end

    def retokenize_null_constant(pp_token, context)
      if pp_token.value == "NULL"
        pp_token.class.new(:NULL, pp_token.value, pp_token.location)
      else
        nil
      end
    end

    def retokenize_identifier(pp_token, context)
      if pp_token.value =~ /\A[a-z_][a-z_0-9]*\z/i
        pp_token.class.new(:IDENTIFIER, pp_token.value, pp_token.location)
      else
        nil
      end
    end

    def retokenize_punctuator(pp_token, context)
      case pp_token.value
      when "{", "}", "(", ")", "[", "]", ";", ",", "::", ":", "?", "||",
           "|=", "|", "&&", "&=", "&", "^=", "^", "==", "=", "!=", "!",
           "<<=", "<=", "<<", "<", ">>=", ">=", ">>", ">", "+=", "++", "+",
           "->*", "->", "-=", "--", "-", "*=", "*", "/=", "/", "%=", "%",
           "...", ".*", ".", "~"
        pp_token.class.new(pp_token.value, pp_token.value, pp_token.location)
      else
        nil
      end
    end
  end

  class ScopedNameSet
    def initialize
      @name_stack = [Set.new]
      @scope_stack = [GlobalScope.new]
    end

    def enter_scope
      @name_stack.push(Set.new)
      @scope_stack.push(Scope.new(@scope_stack.size))
    end

    def leave_scope
      @name_stack.pop
      @scope_stack.pop
    end

    def add(name)
      @name_stack.last.add(name)
    end

    def include?(name)
      @name_stack.any? { |set| set.include?(name) }
    end
  end

end
end
