// Copyright (c) 2003-present, Jodd Team (http://jodd.org)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

package jodd.lagarto.adapter;

import jodd.lagarto.Tag;
import jodd.lagarto.TagAdapter;
import jodd.lagarto.TagType;
import jodd.lagarto.TagVisitor;
import jodd.util.CharUtil;

import java.nio.CharBuffer;

/**
 * Strips all non-important characters from HTML.
 * Script and style blocks are not stripped, just HTML text blocks
 * and comments.
 */
public class StripHtmlTagAdapter extends TagAdapter {

	public StripHtmlTagAdapter(final TagVisitor target) {
		super(target);
	}

	protected int strippedCharsCount;
	protected boolean strip;

	@Override
	public void start() {
		strippedCharsCount = 0;
		strip = true;
		super.start();
	}

	/**
	 * Skips HTML comments.
	 */
	@Override
	public void comment(final CharSequence comment) {
		strippedCharsCount += comment.length() + 7;
	}

	private static final CharSequence PRE = "PRE";

	@Override
	public void tag(final Tag tag) {
		if (tag.getType() == TagType.START && tag.nameEquals(PRE)) {
			strip = false;
		}

		if (tag.getType() == TagType.END && tag.nameEquals(PRE)) {
			strip = true;
		}

		super.tag(tag);
	}

	/**
	 * Cleans unnecessary whitespaces.
	 */
	@Override
	public void text(final CharSequence text) {
		if (!strip) {
			super.text(text);
			return;
		}

		final int textLength = text.length();

		final char[] dest = new char[textLength];

		int ndx = 0;
		boolean regularChar = true;
		for (int i = 0; i < textLength; i++) {
			char c = text.charAt(i);

			if (CharUtil.isWhitespace(c)) {
				if (regularChar) {
					regularChar = false;
					c = ' ';
				} else {
					continue;
				}
			} else {
				regularChar = true;
			}

			dest[ndx] = c;
			ndx++;
		}

		if (regularChar || (ndx != 1)) {
			super.text(CharBuffer.wrap(dest, 0, ndx));
			strippedCharsCount += textLength - ndx;
		} else {
			strippedCharsCount += textLength;
		}
	}

	/**
	 * Returns total number of stripped chars.
	 */
	public int getStrippedCharsCount() {
		return strippedCharsCount;
	}
}
