// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 2.1
// Copyright (C) 2005 Martin Jericho
// http://sourceforge.net/projects/jerichohtml/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package au.id.jericho.lib.html;

import java.io.*;
import java.util.*;

/**
 * Represents a modified version of an original {@link Source} document.
 * <p>
 * An <code>OutputDocument</code> represents an original source document that
 * has been modified by substituting segments of it with other text.
 * Each of these substitutions is registered by {@linkplain #add(OutputSegment) adding} an {@link OutputSegment}
 * to the <code>OutputDocument</code>.
 * After all of the substitutions have been added, the modified text can be retrieved using the
 * {@link #output(Writer)} or {@link #toString()} methods.
 * <p>
 * The registered {@linkplain OutputSegment output segments} must not overlap each other, but may be adjacent.
 * Multiple output segments may be added at the same {@linkplain OutputSegment#getBegin() begin} position provided that they are all
 * zero-length, with the exception of one segment which may {@linkplain OutputSegment#getEnd() end} at a different position.
 * <p>
 * The following example converts all externally referenced style sheets to internal style sheets:
 * <pre>
 *  URL sourceUrl=new URL(sourceUrlString);
 *  String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
 *  OutputDocument outputDocument=new OutputDocument(htmlText);
 *  Source source=new Source(htmlText);
 *  StringBuffer sb=new StringBuffer();
 *  List linkStartTags=source.findAllStartTags(Tag.LINK);
 *  for (Iterator i=linkStartTags.iterator(); i.hasNext();) {
 *    StartTag startTag=(StartTag)i.next();
 *    Attributes attributes=startTag.getAttributes();
 *    String rel=attributes.getValue("rel");
 *    if (!"stylesheet".equalsIgnoreCase(rel)) continue;
 *    String href=attributes.getValue("href");
 *    if (href==null) continue;
 *    String styleSheetContent;
 *    try {
 *      styleSheetContent=Util.getString(new InputStreamReader(new URL(sourceUrl,href).openStream()));
 *    } catch (Exception ex) {
 *      continue; // don't convert if URL is invalid
 *    }
 *    sb.setLength(0);
 *    sb.append("&lt;style");
 *    Attribute typeAttribute=attributes.get("type");
 *    if (typeAttribute!=null) sb.append(' ').append(typeAttribute);
 *    sb.append("&gt;\n").append(styleSheetContent).append("\n&lt;/style&gt;");
 *    outputDocument.add(new StringOutputSegment(startTag,sb.toString()));
 *  }
 *  String convertedHtmlText=outputDocument.toString();
 * </pre>
 *
 * @see OutputSegment
 * @see StringOutputSegment
 */
public final class OutputDocument {
	private CharSequence sourceText;
	private ArrayList outputSegments=new ArrayList();

	/**
	 * Constructs a new <code>OutputDocument</code> based on the specified source text.
	 * <p>
	 * Note that a {@link Source} object can be passed directly as an argument to this constructor
	 * as it implements the <code>CharSequence</code> interface.
	 *
	 * @param sourceText  the source text.
	 */
	public OutputDocument(final CharSequence sourceText) {
	  if (sourceText==null) throw new IllegalArgumentException("sourceText argument must not be null");
		this.sourceText=sourceText;
	}

	/**
	 * Returns the original source text upon which this <code>OutputDocument</code> is based.
	 * @return the original source text upon which this <code>OutputDocument</code> is based.
	 */
	public CharSequence getSourceText() {
		return sourceText;
	}

	/**
	 * Adds the specified {@linkplain OutputSegment output segment} to this <code>OutputDocument</code>.
	 * <p>
	 * Note that for efficiency reasons this method does not throw an exception if the added output segment overlaps another,
	 * but an {@link OverlappingOutputSegmentsException} is thrown when the {@linkplain #output(Writer) output is generated}.
	 *
	 * @param outputSegment  the output segment to add.
	 */
	public void add(final OutputSegment outputSegment) {
		outputSegments.add(outputSegment);
	}

	/**
	 * Adds the specified {@link FormControl} to this <code>OutputDocument</code>.
	 * <p>
	 * The effect of this method is to {@linkplain #add(OutputSegment) add} zero or more
	 * {@linkplain OutputSegment output segments} to the output document as required to reflect
	 * previous modifications to the control's state.
	 * The state of a control includes its <a href="FormControl.html#SubmissionValue">submission value</a>,
	 * {@linkplain FormControl#setOutputStyle(FormControlOutputStyle) output style}, and whether it has been
	 * {@linkplain FormControl#setDisabled(boolean) disabled}.
	 * <p>
	 * The state of the form control should not be modified after this method is called, as there is no guarantee that
	 * subsequent changes either will or will not be reflected in the final output.
	 * A second call to this method with the same parameter is not allowed.
	 * It is therefore recommended to call this method as the last action before the output is generated.
	 * <p>
	 * Although the specifics of the number and nature of the output segments added in any particular circumstance
	 * is not defined in the specification, it can generally be assumed that only the minimum changes necessary
	 * are made to the original document.  If the state of the control has not been modified, calling this method
	 * has no effect at all.
	 *
	 * @param formControl  the form control to add.
	 * @see #add(FormFields)
	 */
	public void add(final FormControl formControl) {
		formControl.addToOutputDocument(this);
	}

	/**
	 * {@linkplain #add(FormControl) Adds} all the constituent {@linkplain FormControl form controls}
	 * from the specified {@link FormFields} to this <code>OutputDocument</code>.
	 * <p>
	 * This is equivalent to the following code:
	 * <pre>for (Iterator i=formFields.{@link FormFields#getFormControls() getFormControls()}.iterator(); i.hasNext();)
	 *   {@link #add(FormControl) add}((FormControl)i.next());</pre>
	 * <p>
	 * The state of any of the form controls in the specified form fields should not be modified after this method is called,
	 * as there is no guarantee that subsequent changes either will or will not be reflected in the final output.
	 * A second call to this method with the same parameter is not allowed.
	 * It is therefore recommended to call this method as the last action before the output is generated.
	 *
	 * @param formFields  the form fields to add.
	 * @see #add(FormControl)
	 */
	public void add(final FormFields formFields) {
		formFields.addToOutputDocument(this);
	}

	/**
	 * Outputs the final content of this <code>OutputDocument</code> to the specified <code>Writer</code>.
	 * <p>
	 * An {@link OverlappingOutputSegmentsException} is thrown if any of the output segments overlap.
	 * For efficiency reasons this condition is not caught when the offending output segment is {@linkplain #add(OutputSegment) added}.
	 *
	 * @param writer  the destination <code>java.io.Writer</code> for the output.
	 * @throws IOException  if an I/O exception occurs.
	 * @throws OverlappingOutputSegmentsException  if any of the output segments overlap.
	 * @see #getReader()
	 */
	public void output(final Writer writer) throws IOException {
		if (outputSegments.isEmpty()) {
			Util.appendTo(writer,sourceText);
			return;
		}
		int pos=0;
		Collections.sort(outputSegments,OutputSegment.COMPARATOR);
		OutputSegment lastOutputSegment=null;
		for (final Iterator i=outputSegments.iterator(); i.hasNext();) {
			final OutputSegment outputSegment=(OutputSegment)i.next();
			if (outputSegment==lastOutputSegment) continue; // silently ignore duplicate output segment
			if (outputSegment.getBegin()<pos) throw new OverlappingOutputSegmentsException(lastOutputSegment,outputSegment);
			if (outputSegment.getBegin()>pos) Util.appendTo(writer,sourceText,pos,outputSegment.getBegin());
			outputSegment.output(writer);
			lastOutputSegment=outputSegment;
			pos=outputSegment.getEnd();
		}
		if (pos<sourceText.length()) Util.appendTo(writer,sourceText,pos,sourceText.length());
		writer.close();
	}

	/**
	 * Returns the final content of this <code>OutputDocument</code> as a <code>String</code>.
	 * @return the final content of this <code>OutputDocument</code> as a <code>String</code>.
	 * @throws OverlappingOutputSegmentsException  if any of the output segments overlap.
	 * @see #output(Writer)
	 */
	public String toString() {
		final StringWriter writer=new StringWriter((int)(sourceText.length()*1.5));
		try {
			output(writer);
		} catch (IOException ex) {throw new RuntimeException(ex);} // should never happen with StringWriter
		return writer.toString();
	}
	
	/**
	 * Returns a <code>Reader</code> that reads the final content of this <code>OutputDocument</code>.
	 * <p>
	 * The current implementation of this method simply returns <code>new StringReader(</code>{@link #toString()}<code>)</code>,
	 * but a future version may implement this method in a more memory efficient manner.
	 *
	 * @return a <code>Reader</code> that reads the final content of this <code>OutputDocument</code>.
	 * @throws OverlappingOutputSegmentsException  if any of the output segments overlap.
	 * @see #output(Writer)
	 */
	public Reader getReader() {
		return new StringReader(toString());
	}
}
