package com.rojoma.json
package io

import java.io.Reader

import scala.annotation.switch

/** Convert a character-stream into a token-stream.
  *
  * This is guaranteed to read no more than necessary to ensure it has
  * reached the end of a single token.  For objects, arrays, and
  * strings, it will read only up to (and, of course, including) the
  * closing delimiter.  For other types, it may read one character
  * further to assure itself that it has reached the end.
  *
  * A `JsonTokenIterator` does many small reads; it may be a good idea
  * to wrap the input `Reader` into a `BufferedReader`.  If you do not
  * need to read non-JSON out of the underlying `Reader` afterward, a
  * [[com.rojoma.json.io.BlockJsonTokenIterator]] maybe be faster.
  *
  * As extensions to standard JSON, this reader supports single-quoted
  * strings and Javascript-style comments.
  *
  * @see [[com.rojoma.json.io.BlockJsonTokenIterator]]
  * @see [[com.rojoma.json.io.JsonTokenGenerator]]
  * @see [[com.rojoma.json.io.JsonToken]]
  */
class JsonTokenIterator(reader: Reader) extends BufferedIterator[JsonToken] {
  private var isPeeked: Boolean = false
  private var peeked: Char = _

  private var nextToken: JsonToken = null

  private var nextCharRow = 1 // This is the position of the next char returned from "nextChar()" or "peekChar()"
  private var nextCharCol = 1

  private def lexerError(receivedChar: Char, expected: String, row: Int, col: Int): Nothing = {
    throw new JsonUnexpectedCharacter(receivedChar, expected, Position(row, col))
  }

  private def nextChar() = {
    peekChar()
    isPeeked = false
    if(peeked == '\n') { nextCharRow += 1; nextCharCol = 1 }
    else nextCharCol += 1
    peeked
  }

  private def peekChar() = {
    if(!isPeeked) {
      val newChar = reader.read()
      if(newChar == -1) throw new JsonLexerEOF(Position(nextCharRow, nextCharCol))
      peeked = newChar.toChar
      isPeeked = true
    }
    peeked
  }

  private def atEOF(): Boolean = {
    if(isPeeked) return false
    val newChar = reader.read()
    if(newChar == -1) return true
    peeked = newChar.toChar
    isPeeked = true
    return false
  }

  private def skipToEndOfLine() = while(!atEOF() && peekChar() != '\n') nextChar()

  private def skipBlockComment() {
    var last = nextChar()
    while(last != '*' || peekChar() != '/') last = nextChar()
    nextChar() // skip final '/'
  }

  private def skipComment() {
    nextChar() // skip opening "/"
    val row = nextCharRow
    val col = nextCharCol
    nextChar() match {
      case '/' => skipToEndOfLine()
      case '*' => skipBlockComment()
      case c => lexerError(c, "/ or *", row, col)
    }
  }

  @annotation.tailrec
  private def skipWhitespace() {
    while(!atEOF() && Character.isWhitespace(peekChar())) nextChar()
    if(!atEOF() && peekChar() == '/') { skipComment(); skipWhitespace() }
  }

  def hasNext: Boolean = {
    if(nextToken == null) advance()
    nextToken != null
  }

  def head: JsonToken = {
    if(!hasNext) throw new NoSuchTokenException(Position(nextCharRow, nextCharCol))
    nextToken
  }

  def next(): JsonToken = {
    val result = head
    nextToken = null
    result
  }

  private def advance() {
    skipWhitespace()
    if(atEOF()) { nextToken = null; return }
    val tokenStartRow = nextCharRow
    val tokenStartCol = nextCharCol
    val token = (peekChar(): @switch) match {
      case '{' =>
        nextChar()
        TokenOpenBrace()
      case '}' =>
        nextChar()
        TokenCloseBrace()
      case '[' =>
        nextChar()
        TokenOpenBracket()
      case ']' =>
        nextChar()
        TokenCloseBracket()
      case ':' =>
        nextChar()
        TokenColon()
      case ',' =>
        nextChar()
        TokenComma()
      case '"' | '\'' => readString()
      case '-' => readNumber()
      case c =>
        if(isDigit(c)) readNumber() // should I inline this into a case '0' | '1' | ... | '9' ?
        else if(Character.isUnicodeIdentifierStart(c)) readIdentifier()
        else lexerError(c, "start of datum", nextCharRow, nextCharCol)
    }
    token.row = tokenStartRow
    token.column = tokenStartCol
    nextToken = token
  }

  private def isDigit(c: Char) = '0' <= c && c <= '9'

  private def readDigit() = {
    val row = nextCharRow
    val col = nextCharCol
    val c = nextChar()
    if(!isDigit(c)) lexerError(c, "digit", row, col)
    c
  }

  private def readNumber() = {
    // JSON numbers match (a subset of) the language generated by
    // the regular expression:
    //    -?\d+(\.\d+)?([eE][+-]?\d+)?
    // In particular, JSON restricts leading zeros, but we'll match
    // the whole thing anyway.
    val sb = new StringBuilder

    val startRow = nextCharRow
    val startCol = nextCharCol

    if(peekChar() == '-') sb += nextChar()

    do { sb += readDigit() } while(!atEOF() && isDigit(peekChar()))

    val hasFrac = !atEOF() && peekChar() == '.'
    if(hasFrac) {
      sb += nextChar() // skip decimal
      do { sb += readDigit() } while(!atEOF() && isDigit(peekChar()))
    }

    val hasExponent = !atEOF() && (peekChar() == 'e' || peekChar() == 'E')
    if(hasExponent) {
      sb += nextChar() // skip e/E
      if(peekChar() == '-') sb += nextChar()
      else if(peekChar() == '+') nextChar() // just skip it
      do { sb += readDigit() } while(!atEOF() && isDigit(peekChar()))
    }
 
    val n = sb.toString
    try {
      TokenNumber(BigDecimal(n, java.math.MathContext.UNLIMITED))
    } catch {
      case _: NumberFormatException =>
        throw new JsonNumberOutOfRange(n, Position(startRow, startCol))
    }
  }

  private def readIdentifier() = {
    val sb = new StringBuilder
    sb += nextChar()
    while(!atEOF() && Character.isUnicodeIdentifierPart(peekChar())) sb += nextChar()
    TokenIdentifier(sb.toString())
  }

  private def readString() = {
    val sb = new StringBuilder
    val Boundary = nextChar()
    while(peekChar() != Boundary) {
      readPotentialSurrogatePairInto(sb, readChar(), Boundary)
    }
    nextChar() // skip closing character
    TokenString(sb.toString)
  }

  @annotation.tailrec
  private def readPotentialSurrogatePairInto(sb: StringBuilder, c: Char, endOfString: Char) {
    if(c >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) {
      val badChar = 0xfffd.toChar
      if(Character.isHighSurrogate(c)) {
        if(peekChar() == endOfString) {
          sb += badChar
        } else {
          val potentialSecondHalf = readChar()
          if(Character.isLowSurrogate(potentialSecondHalf)) {
            sb += c
            sb += potentialSecondHalf
          } else {
            sb += badChar
            readPotentialSurrogatePairInto(sb, potentialSecondHalf, endOfString)
          }
        }
      } else {
        sb += badChar
      }
    } else {
      sb += c
    }
  }

  private def readChar() = {
    nextChar() match {
      case '\\' => readEscapedCharacter()
      case c => c
    }
  }

  private def readEscapedCharacter(): Char = {
    val row = nextCharRow
    val col = nextCharCol
    nextChar() match {
      case '"' => '"'
      case '\'' => '\''
      case '\\' => '\\'
      case '/' => '/'
      case 'b' => '\b'
      case 'f' => '\f'
      case 'n' => '\n'
      case 'r' => '\r'
      case 't' => '\t'
      case 'u' => readUnicodeCharacter()
      case c => lexerError(c, "string escape character", row, col)
    }
  }

  private def readUnicodeCharacter(): Char = {
    val h1, h2, h3, h4 = readHexDigit()
    ((h1 << 12) | (h2 << 8) | (h3 << 4) | h4).toChar
  }

  private def readHexDigit(): Int = {
    val row = nextCharRow
    val col = nextCharCol
    nextChar() match {
      case c if isDigit(c) => c.toInt - '0'.toInt
      case c if 'a' <= c && c <= 'f' => 10 + c.toInt - 'a'.toInt
      case c if 'A' <= c && c <= 'F' => 10 + c.toInt - 'A'.toInt
      case c => lexerError(c, "hex digit", row, col)
    }
  }
}
