• 大小: 0.05M
    文件类型: .cs
    金币: 1
    下载: 0 次
    发布日期: 2021-02-26
  • 语言: C#
  • 标签: 压缩  HTML  

资源简介

一个用于网站页面输出HTML压缩的算法。


   

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
using System.Threading;
using System.Diagnostics;

namespace Inno.Infrastructure.Utilities
{
    /// <summary>
    /// An fast html compressor tool that can remove unecessary whitespaces, comments or compress
    /// the inline scripts or css styles from within html streams. 
    /// </summary>
    public class HtmlCompressor
    {
        private StreamReader _input;
        private StreamWriter _output;
        private Stream _outputStream;
        private long _originalContentLength = 0;
        private long _compressedContentLength = 0;

#if DEBUG
        private StringBuilder _debugOutput = new StringBuilder();
#endif

        private char _tok;

        private const int EOF = -1;

        /// <summary>
        /// Create a default html compressor w/out input and output stream specified. 
        /// However, the two streams must be specified before compress can be called. 
        /// </summary>
        public HtmlCompressor()
        { }

        /// <summary>
        /// Create a compressor with specified input and output stream. 
        /// </summary>
        /// <param name="input"></param>
        /// <param name="output"></param>
        public HtmlCompressor(StreamReader input, StreamWriter output)
        {
            this._input = input;
            this._output = output;
        }

        /// <summary>
        /// Create a compressor with specified output stream. 
        /// </summary>
        /// <param name="output"></param>
        public HtmlCompressor(StreamWriter output)
        {
            this._output = output;
        }

        /// <summary>
        /// Create a compressor with output stream. 
        /// </summary>
        /// <param name="outputStream"></param>
        public HtmlCompressor(Stream outputStream)
        {
            this._outputStream = outputStream;
        }

        /// <summary>
        /// Gets or sets the encoding of the compressor uses. 
        /// </summary>
        public Encoding Encoding { get; set; }

        /// <summary>
        /// Gets or sets if the compress radio should be shown. 
        /// </summary>
        public bool enable_compress_ratio { get; set; }

        // tag types. 
        enum TagTypes { NONE, CDADA, DOCTYPE, TEXTAREA, PRE, STYLE, SCRIPT, OTHERS }

        #region state machine states

        private TagTypes _tagType = TagTypes.NONE;

        // states
        private bool _startlt;
        private long _startltPos;

        private bool _startTagName;
        private bool _startTag;

        private bool _startAttr;
        private bool _startAttrName;
        private bool _endAttrName;
        private bool _startEq;
        private long _startEqPos = -1;
        private bool _startAttrValue;
        private bool _startAttrValueNoQuotes;
        private char _attrQuoteChar;

        private bool _lookForEndTag;

        private bool _startWsEndTag;
        private StringBuilder _endTagNameBuf = new StringBuilder();
        private string _endTagName;
        private bool _blockEndTag;
        private bool _lookAheadEndOfTagGt;

        private bool _startEscape;
        private long _lastEscapePos;
        private int _escapeCount;

        private bool _startPI; // start processing instruction <!
        private bool _startComment;
        private bool _startCDATA;
        private int _cdataCloseBracketCount = 0;
        private long _cdataCloseBracketPos = -1;

        private int _endOfCommentHyphenCount;
        private long _lastHyphenPos;

        private StringBuilder _cdataTagName = new StringBuilder();

        // tag name buffer. 
        private StringBuilder _tagNameBuf = new StringBuilder(256);
        private string _tagName;

        // text between tags. 
        private bool _lookForNextTag = true;
        private StringBuilder _textNodeContent = new StringBuilder();

        private bool _startWsContent;
        private bool _delayWsContent; // if the content should be delayed. 
        private StringBuilder _wsContent = new StringBuilder();
        private long _endOfTagLtPos;

        // simple state machine to by pass javascript/css comments and strings. 
        private bool _startJsComment;
        private bool _isJsBlockComment;
        private bool _startJsString;
        private char _jsStringQuoteChar;
        private bool _lookAheadStartJsComment;
        private bool _lookAheadEndOfJsBlockComment;
        private bool _endOfTagSeen;
        private long _endOfJsBlockCommentLookAheadPos;

        #endregion

        #region helper methods

        // read next token from the stream and put the token in the _tok field. 
        long _pointer = 0;
        private bool next()
        {
            _pointer  ;

            var b = _input.Read();

            if (b == EOF)
            {
                return false;
            }

            _tok = (char)b;

            if (this.enable_compress_ratio)
            {
                var encoding = this.Encoding ?? Encoding.Default;
                var c = encoding.GetByteCount(new char[] { _tok });
                _originalContentLength  = c;
            }

            return true;
        }

        private void skip()
        {
            while (next() && char.IsWhiteSpace(_tok)) ;
        }

        enum TokenState
        {
            StartTag,
        }

        private HtmlCompressor echo()
        {
            return echo(_tok);
        }

        private HtmlCompressor echo(char ch)
        {
            if (_delayWsContent)
            {
                _wsContent.Append(ch);
            }
            else
            {
                if (null != _output) _output.Write(ch);
                else if (null != _outputStream)
                {
                    var encoding = null != this.Encoding ? this.Encoding : Encoding.Default;
                    var bytes = encoding.GetBytes(new char[] { ch });
                    _outputStream.Write(bytes, 0, bytes.Length);
                    if (this.enable_compress_ratio)
                    {
                        _compressedContentLength  = bytes.Length;
                    }
                }
#if DEBUG
                _debugOutput.Append(ch);
#endif
            }
            return this;
        }

        private HtmlCompressor echo(string s)
        {
            if (_delayWsContent)
            {
                _wsContent.Append(s);
            }
            else
            {
                if (null != _output) _output.Write(s);
                else if (null != _outputStream)
                {
                    var encoding = null != this.Encoding ? this.Encoding : Encoding.Default;
                    var bytes = encoding.GetBytes(s);
                    //if (bytes.Length > 0)
                    {
                        _outputStream.Write(bytes, 0, bytes.Length);
                        if (this.enable_compress_ratio)
                        {
                            _compressedContentLength  = bytes.Length;
                        }
                    }
                }
#if DEBUG
                _debugOutput.Append(s);
#endif
            }
            return this;
        }

        private long pos()
        {
            return _pointer - 1;
        }

        private bool isws()
        {
            return char.IsWhiteSpace(_tok);
        }

        private bool iscrlf()
        {
            return _tok == '\r' || _tok == '\n';
        }

        private bool in_script_or_css()
        {
            return _startWsContent && _delayWsContent;
        }

        private static Regex _rExtraSpaces = new Regex("\\s{2,}", RegexOptions.Compiled | RegexOptions.IgnoreCase);
        private void echo_text_node_content()
        {
            echo_text_node_content(false);
        }
        private void echo_text_node_content(bool trim)
        {
            bool hasContent = false;

            for (var i = 0; i < _textNodeContent.Length; i  )
            {
                if (!char.IsWhiteSpace(_textNodeContent[i]))
                {
                    hasContent = true;
                    break;
                }
            }

            if (hasContent)
            {
                var textNodeContent = _textNodeContent.ToString();
                textNodeContent = _rExtraSpaces.Replace(textNodeContent, " ");

                if (trim)
                {
                    bool reserveLeadingSpace = false;
                    if (textNodeContent.Length > 0 && textNodeContent[0] == ' ')
                    {
                        reserveLeadingSpace = true;
                    }
                    if (reserveLeadingSpace)
                    {
                        textNodeContent = ' '   textNodeContent.Trim();
                    }
                    else
                    {
                        textNodeContent = textNodeContent.Trim();
                    }
                }
                // only trim new lines and tabs. 
                else textNodeContent = textNodeContent.Trim('\r', '\n', '\t');

                echo(textNodeContent);
            }
            else
            {
                if (_textNodeContent.Length > 0 && _textNodeContent[0] == ' ')
                {
                    echo(' ');
                }
            }

            _lookForNextTag = false;
            _textNodeContent.Remove(0, _textNodeContent.Length);
        }

        // compress the specified scripts. 
        private string compress_scripts(string scripts)
        {
            try
            {
                if (string.IsNullOrEmpty(scripts)) return string.Empty;
                return MyMin.parse(scripts);
            }
            catch {}
            return scripts; 
        }

        // compress the specified styles. 
        public string compress_styles(string styles)
        {
            try
            {
                if (string.IsNullOrEmpty(styles)) return string.Empty;
                return MyMin.parse(styles, true, true);
            }
            catch {}
            return styles; 
        }

        #endregion

        /// <summary>
        /// Use this method to fill additional stream to the compressor so that
        /// multiple segments of html content can be compressed serially. 
        /// You don't need to pass a complete html (stream or string) in order to 
        /// compress it, 'cause the compressor is actually a a look-ahead state
        /// machine, it determines how to action accordinng to the next character
        /// from the input stream, thus you can fill the compressor with streams
        /// whiling they're being read from other places such as network, etc. 
        /// </summary>
        /// <param name="input"></param>
        public void fill(StreamReader input)
        {
            this._input = input;
        }

        /// <summary>
        /// Fill with an array of bytes. 
        /// </summary>
        /// <param name="input"></param>
        public void fill(byte[] input, int offset, int count)
        {
            MemoryStream stream = new MemoryStream(input, offset, count);
            StreamReader reader = new StreamReader(
                stream, null != this.Encoding ? this.Encoding : Encoding.Default
                );
            fill(reader);
        }

        /// <summary>
        /// Compress the given html string. 
        /// </summary>
        /// <param name="htmlString"></param>
        public void compress(string htmlString)
        {
            var bytes = Encoding.UTF8.GetBytes(htmlString);
            var memoryStream = new MemoryStream(bytes);
            this._input = new StreamReader(memoryStream);

            // start compress. 
            compress();
        }

        /// <summary>
        /// close compressing process. 
        /// </summary>
        public void flush()
        {
            // collect pending html texts. 
            if (_lookForNextTag)
            {
                echo_text_node_content();
            }
#if DEBUG
            if (this.enable_compress_ratio && _originalContentLength > 0)
            {
                string compress_ratio = string.Format(
                    "<div class='html-compressor-compress-ratio'><i style='font-size:.7em;'>---- compress ratio: {0:F2}%. ----</i></div>",
                    100 * ((_originalContentLength - _compressedContentLength) * 1.0 / _originalContentLength)
                    );
                if (null != _output) _output.Write(compress_ratio);
                else if (null != _outputStream)
                {
                    var encoding = null != this.Encoding ? this.Encoding : Encoding.Default;
                    var bytes = encoding.GetBytes(compress_ratio);
                    _outputStream.Write(bytes, 0, bytes.Length);
                }
            }
#endif
        }

        public bool isEndOfDocument { get; private set; }

        /// <summary>
        /// Begin compress the specified html stream which can be specified on the constructor or
        /// by calling the "fill" method. To compress a string, use compress(string htmlString). 
        /// </summary>
        public void compress()
        {
            if (null == this._input || (null == this._output && null == this._outputStream))
            {
                throw new InvalidOperationException("Input and output stream must both be specified. ");
            }

            while (next())
            {
                // skip white space. 
                switch (_tok)
                {
                    /* start tag quote */
                    case '<':
                        {
                            if (_lookAheadEndOfTagGt)
                            {
                                echo('/');
                                _lookAheadEndOfTagGt = false;
                            }

                            if (_startComment) continue;
                            else if (_startAttrValue || _startCDATA)
                            {
                                // ouput directly in attribute values and comments. 
                                echo();
                            }
                            else if (_startWsContent)
                            {
                                // because we cann't determine if the token comes
                                // from inside the script/style or indicates a
                                // end of block tag, we have to echo it anyway. 
                                echo();

                                if (!_startJsString && !_startJsComment)
                                {
                                    _endOfTagLtPos = pos();
                                }
                            }
                            else
                            {
                                _startlt = true;
                                _startltPos = pos();
                            }
                        }
                        break;

                    /* probably comment sign, i.e., <!-- */
                    case '!':
                        {
                            if (_startlt)
                            {
                                if (_startltPos   1 == pos())
                                {
                                    // start of processing instructions, e.g., 
                                    // <!--, <![CDATA, <!DOCTYPE, etc. 
                                    _startPI = true;
                                }
                                else
                                {
                                    echo();
                                }
                                _startlt = false;
                                _startltPos = -1;
                            }
                            else
                            {
                                // otherwise, treat it as a generic character. 
                                generic_handler();
                            }

                        }
                        break;

                    /* probably comment sign, i.e., <!-- */
                    case '-':
                        {
                            if (_startPI)
                            {
                                _startComment = true;
                                _startPI = false;

                                // reset the hyphen testing flags. 
                                _endOfCommentHyphenCount = 0;
                                _lastHyphenPos = -1;
                            }
                            else if (_startComment)
                            {
                                if (_lastHyphenPos   1 != pos())
                                {
                                    _endOfCommentHyphenCount = 0;
                                    _lastHyphenPos = -1;
                                }

                                // record for test of end of comment. 
                                _endOfCommentHyphenCount  ;
                                _lastHyphenPos = pos();
                            }
                            else
                            {
                                // call generic handler. 
                                generic_handler();
                            }
                        }
                        break;

                    /* probably start of CDATA <![ */
                    case '[':
                        {
                            if (_startPI)
                            {
                                if (_lookForNextTag)
                                {
                                    echo_text_node_content();
                                }

                                _startCDATA = true;
                                _startPI = false;
                                echo('<').echo('!').echo();

                                // clear the cdata tag name buffer. 
                                _cdataTagName.Remove(0, _cdataTagName.Length);
                            }
                            else if (_startCDATA)
                            {
                                echo();
                            }
                            else
                            {
                                // call generic handler. 
                                generic_handler();
                            }
                        }
                        break;

                    /* probably end of CDATA section */
                    case ']':
                        {
                            if (_startCDATA)
                            {
                                echo();

                                if (_cdataCloseBracketPos   1 != pos())
                                {
                                    _cdataCloseBracketPos = -1;
                                    _cdataCloseBracketCount = 0;
                                }

                                _cdataCloseBracketCount  ;
                                _cdataCloseBracketPos = pos();
                            }
                            else
                                // call generic handler. 
                                generic_handler();
                        }
                        break;

                    /* probably end of tag such as </a>, <br />, two forms */
                    case '/':
                        {
                            if (_lookAheadEndOfTagGt)
                            {
                                echo('/');
                                _lookAheadEndOfTagGt = false;
                            }

                            if (_startAttrValueNoQuotes)
                            {
                                echo();
                                continue;
                            }

                            if (_startWsContent)
                            {
                                // skip token in the scripts/css. 
                                if (_startJsString)
                                {
                                    echo();
                                }
                                else if (_startJsComment)
                                {
                                    echo();

                                    if (_lookAheadEndOfJsBlockComment)
                                    {
                                        if (_endOfJsBlockCommentLookAheadPos   1 == pos())
                                        {
                                            _lookAheadEndOfJsBlockComment = false;
                                            _startJsComment = false;
                                            _isJsBlockComment = false;
                                        }
                                    }
                                }
                                else if (_lookAheadStartJsComment)
                                {
                                    echo();

                                    // we found the inline comment. 
                                    _lookAheadStartJsComment = false;
                                    _startJsComment = true;
                                    _isJsBlockComment = false;
                                }
                                else
                                {
                                    // we need read on to determine what to do. 
                                    echo();
                                    _lookAheadStartJsComment = true;

                                    // this flag sets if we have seen the probable end of tag "</". 
                                    _endOfTagSeen = _endOfTagLtPos   1 == pos();
                                }
                            }
                            else if (_startAttr && !_startAttrValue || _startTagName)
                            {
                                _blockEndTag = false;
                                _startAttr = false;
                                _startTagName = false;
                                _startEq = false;
                                _startEqPos = -1;

                                if (_startTag || _startTagName)
                                    echo(' ');

                                echo();
                            }
                            else if (_startTag)
                            {
                                // might be end of tag sign '<img />', need look ahead. 
                                _lookAheadEndOfTagGt = true;
                            }
                            else if (_startlt)
                            {
                                if (_lookForNextTag)
                                {
                                    echo_text_node_content(true);
                                }

                                echo('<').echo();

                                _startlt = false;
                                _lookForEndTag = true;
                            }
                            else
                            {
                                // call generic handler. 
                                generic_handler();
                            }
                        }
                        break;

                    /* posible start or end of js block comment */
                    case '*':
                        {
                            if (!_startWsContent)
                            {
                                generic_handler();
                                continue;
                            }

                            if (_lookAheadStartJsComment)
                            {
                                // we detected a block js comment here. 
                                echo();

                                _startJsComment = true;
                                _isJsBlockComment = true;
                                _lookAheadStartJsComment = false;
                            }
                            else if (_startJsComment && _isJsBlockComment)
                            {
                                echo();

                                if (!_lookAheadEndOfJsBlockComment)
                                {
                                    _lookAheadEndOfJsBlockComment = true;
                                }
                                _endOfJsBlockCommentLookAheadPos = pos();
                            }
                            else
                            {
                                // call generic handler. 
                                generic_handler();
                            }
                        }
                        break;
                    /* end tag quote */
                    case '>':
                        {
                            if (_startComment)
                            {
                                if (_lastHyphenPos   1 == pos() && _endOfCommentHyphenCount >= 2)
                                {
                                    // end comment. 
                                    _startComment = false;
                                    continue;
                                }
                            }
                            else if (_startTag && !_startAttrValue || _startTagName || _startAttrValueNoQuotes || _lookAheadEndOfTagGt)
                            {
                                // should be <tag>
                                if (_startTagName)
                                {
                                    // remember the tag name and clear the tag name buffer. 
                                    _tagName = _tagNameBuf.ToString().ToUpper();
                                    _tagNameBuf.Remove(0, _tagNameBuf.Length);

                                    // determine the tag type, we should handle special 
                                    // tag names, such as !DOCTYPE, textarea, pre, style, script. 
                                    if (_tagName == "!DOCTYPE") _tagType = TagTypes.DOCTYPE;
                                    else if (_tagName == "TEXTAREA") _tagType = TagTypes.TEXTAREA;
                                    else if (_tagName == "PRE") _tagType = TagTypes.PRE;
                                    else if (_tagName == "STYLE") _tagType = TagTypes.STYLE;
                                    else if (_tagName == "SCRIPT") _tagType = TagTypes.SCRIPT;
                                    else _tagType = TagTypes.OTHERS;
                                }

                                // must be a tag close character, reset tag related states. 
                                _startTag = false;
                                _startTagName = false;
                                _startAttr = false;
                                _startAttrName = false;
                                _startAttrValue = false;
                                _startEq = false;
                                _startAttrValueNoQuotes = false;

                                _endOfTagLtPos = -1;

                                if (_lookAheadEndOfTagGt)
                                {
                                    _lookAheadEndOfTagGt = false;

                                    echo(' ').echo('/').echo();

                                    _lookForNextTag = true;
                                }
                                else
                                {
                                    echo();

                                    // the text inside the following tags should be preserved, and 
                                    // CSS and scripts code should be compressed as well. 
                                    if (_tagType == TagTypes.PRE || _tagType == TagTypes.SCRIPT ||
                                        _tagType == TagTypes.STYLE || _tagType == TagTypes.TEXTAREA)
                                    {
                                        _startWsContent = true;

                                        _wsContent.Remove(0, _wsContent.Length);
                                        _delayWsContent = (_tagType == TagTypes.SCRIPT
                                            || _tagType == TagTypes.STYLE);

                                        _tagNameBuf.Remove(0, _tagNameBuf.Length);
                                        // _tagType = TagTypes.NONE;
                                    }
                                    else
                                    {
                                        // _tagType = TagTypes.NONE;
                                        _tagNameBuf.Remove(0, _tagNameBuf.Length);
                                        _lookForNextTag = true;
                                    }
                                }
                                continue;
                            }
                            else if (_lookForEndTag)
                            {
                                echo();
                                _lookForEndTag = false;
                                _lookForNextTag = true;
                                continue;
                            }
                            else if (_startWsEndTag)
                            {
                                echo();

                                // close an "end-of-tag" tag, we test the tag name here. 
                                // if it is css or script, we get a chance to compress them. 
                                if (_blockEndTag)
                                {
                                    string endTagName = _endTagNameBuf.ToString();

                                    // test if end of whitespace tag. 

                                    bool isScript = string.Compare(endTagName, "script", StringComparison.OrdinalIgnoreCase) == 0;
                                    bool isStyle = string.Compare(endTagName, "style", StringComparison.OrdinalIgnoreCase) == 0;

                                    if (!(_tagType == TagTypes.TEXTAREA && 0 == string.Compare(endTagName, "textarea", StringComparison.OrdinalIgnoreCase) ||
                                        _tagType == TagTypes.PRE && 0 == string.Compare(endTagName, "pre", StringComparison.OrdinalIgnoreCase) ||
                                        isScript || isStyle))
                                    {
                                        _startWsEndTag = false;
                                        _blockEndTag = false;
                                        _endTagNameBuf.Remove(0, _endTagNameBuf.Length);
                                        continue;
                                    }

                                    if (isScript || isStyle)
                                    {
                                        string wsContent = _wsContent.ToString();
                                        int index = wsContent.LastIndexOf("</"   endTagName   ">", StringComparison.OrdinalIgnoreCase);
                                        wsContent = wsContent.Substring(0, index).Trim();

                                        // do compression. 
                                        if (isScript)
                                        {
                                            wsContent = compress_scripts(wsContent);
                                        }
                                        else
                                        {
                                            wsContent = compress_styles(wsContent);
                                        }

                                        _delayWsContent = false;

                                        if (wsContent.Length > 0)
                                        {
                                            echo("/*<![CDATA[*/"); // make it XHTML compatible
                                            echo(wsContent);
                                            echo("/*]]>*/");
                                        }

                                        echo("</"   endTagName   ">");
                                    }

                                    _wsContent.Remove(0, _wsContent.Length);
                                    _endTagNameBuf.Remove(0, _endTagNameBuf.Length);
                                    _endTagName = string.Empty;
                                }

                                _startWsContent = false;
                                _blockEndTag = false;
                                _startWsEndTag = false;
                                _tagType = TagTypes.NONE; // reset the preserve whitespace tag type. 

                                continue;
                            }
                            else if (_startlt)
                            {
                                _startlt = false;
                                _startltPos = -1;
                                echo();
                                continue;
                            }
                            else if (_startCDATA)
                            {
                                echo();

                                if (_cdataCloseBracketCount >= 1)
                                {
                                    // end of a CDATA section, reset the CDATA states. 
                                    _startCDATA = false;
                                }

                                continue;
                            }
                            // call generic handler. 
                            generic_handler();
                        }
                        break;

                    /* eq sign for attributes */
                    case '=':
                        {
                            if (_startAttrName || _endAttrName)
                            {
                                echo();
                                _startAttrName = false;
                                _endAttrName = false;
                                _startEq = true;
                                _startEqPos = pos();
                                continue;
                            }

                            // call generic handler. 
                            generic_handler();
                        }
                        break;

                    /* quotes */
                    case '\'':
                    case '"':
                        {
                            if (_lookAheadEndOfTagGt)
                            {
                                echo('/');
                                _lookAheadEndOfTagGt = false;
                            }

                            if (_startTag)
                            {
                                if (_startAttrValue)
                                {
                                    // end of attribute value. 
                                    if (_tok == _attrQuoteChar)
                                    {
                                        echo();

                                        _startAttrValue = false;
                                        _startAttr = false;
                                    }
                                    else
                                    {
                                        echo();
                                    }
                                }
                                else
                                {
                                    // a special attribute w/out attribute name. 
                                    if (!_startEq)
                                    {
                                        echo(' ');
                                        _startAttr = true;
                                    }

                                    _startAttrName = false;
                                    _endAttrName = false;
                                    _startEq = false;
                                    _startEqPos = -1;

                                    _startAttrValue = true;
                                    _attrQuoteChar = _tok;

                                    echo();
                                }
                            }
                            else if (in_script_or_css())
                            {
                                echo();

                                if (_startJsString)
                                {
                                    if (_startEscape && pos() != _lastEscapePos   1)
                                    {
                                        _startEscape = false;
                                    }

                                    if (_tok == _jsStringQuoteChar)
                                    {
                                        // determine if this the end of the string. 
                                        if (!_startEscape || _escapeCount % 2 == 0)
                                        {
                                            _startJsString = false;
                                        }
                                    }
                                }
                                else
                                {
                                    _startJsString = true;
                                    _jsStringQuoteChar = _tok;
                                }
                            }
                            else // call generic handler. 
                                generic_handler();
                        }
                        break;

                    /* escape character in quotes */
                    case '\\':
                        if (_startJsString)
                        {
                            if (_startEscape && pos() != _lastEscapePos   1)
                            {
                                _startEscape = false;
                            }

                            if (!_startEscape)
                            {
                                _startEscape = true;
                                _escapeCount = 1;
                            }
                            else
                            {
                                _escapeCount  ;
                            }

                            _lastEscapePos = pos();

                            echo();
                        }
                        else
                        {
                            // call generic handler. 
                            generic_handler();
                        }

                        break;

                    /* whitespace */
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                        {
                            if (_startlt) continue; // skip. 
                            else if (_startTagName)
                            {
                                _startTagName = false;
                                _startTag = true; // tag name found, start tag. 

                                // remember the tag name and clear the tag name buffer. 
                                _tagName = _tagNameBuf.ToString().ToUpper();
                                _tagNameBuf.Remove(0, _tagNameBuf.Length);

                                // determine the tag type, we should handle special 
                                // tag names, such as !DOCTYPE, textarea, pre, style, script. 
                                if (_tagName == "!DOCTYPE") _tagType = TagTypes.DOCTYPE;
                                else if (_tagName == "TEXTAREA") _tagType = TagTypes.TEXTAREA;
                                else if (_tagName == "PRE") _tagType = TagTypes.PRE;
                                else if (_tagName == "STYLE") _tagType = TagTypes.STYLE;
                                else if (_tagName == "SCRIPT") _tagType = TagTypes.SCRIPT;
                                else _tagType = TagTypes.OTHERS;

                                // reset the quote char. 
                                _attrQuoteChar = ' ';
                            }
                            else if (_startTag) // we're inside a tag. 
                            {
                                if (!_startAttr) continue; // skip. 
                                else // the attribute has started. 
                                {
                                    // see if we're inside an attribute name. 
                                    if (_startAttrName)
                                    {
                                        _startAttrName = false;
                                        _endAttrName = true;
                                    }
                                    else if (_startAttrValue)
                                    {
                                        // skip '\r' and '\n' in attribute values. 
                                        if (iscrlf()) continue;

                                        // preserve none crlf whitespaces. 
                                        echo();
                                    }
                                    else if (_startAttrValueNoQuotes)
                                    {
                                        _startAttrValueNoQuotes = false;
                                        _startAttr = false;
                                    }
                                }
                            }
                            else if (_startCDATA)
                            {
                                echo();
                            }
                            else if (_startWsContent)
                            {
                                echo();
                                if (_lookAheadStartJsComment)
                                {
                                    _lookAheadStartJsComment = false;
                                }
                                else if (_startJsComment && !_isJsBlockComment && iscrlf())
                                {
                                    _startJsComment = false;
                                    _isJsBlockComment = false;
                                }
                            }
                            else if (_lookForNextTag) // othercase, we collect the text to a temporary buffer. 
                            {
                                _textNodeContent.Append(_tok);
                            }
                        }
                        break;

                    default:
                        {
                            generic_handler();
                        }
                        break;
                }
            }
        }

        // handle generic token. 
        private void generic_handler()
        {
            if (_lookAheadEndOfTagGt)
            {
                echo('/');
                _lookAheadEndOfTagGt = false;
            }

            if (_startlt)
            {
                _startlt = false;
                _startltPos = -1;
                _startTagName = true; // look for tag name. 

                // append pending text node text. 
                if (_lookForNextTag)
                {
                    echo_text_node_content();
                }

                _tagNameBuf.Remove(0, _tagNameBuf.Length);

                echo('<');
                _tagNameBuf.Append(_tok);
                echo();
            }
            else if (_startTagName)
            {
                _tagNameBuf.Append(_tok);
                echo();
            }
            else if (_startTag)
            {
                if (!_startAttr)
                {
                    echo(' '); // add a space between attributes (or attribute and tag)
                    _startAttr = true;

                    echo();
                    _startAttrName = true;
                }
                else // inside an attribute. 
                {
                    // in this case, this is an attribute w/out value, such <input checked... >. 
                    if (_startAttrName)
                    {
                        echo();
                    }
                    else if (_endAttrName)
                    {
                        echo(' '); // add a space between attributes (or attribute and tag)
                        _startAttr = true;

                        echo();
                        _startAttrName = true;

                        _endAttrName = false;
                    }
                    else if (_startAttrValue)
                    {
                        echo();
                    }
                    else if (_startEq && (_startEqPos   1) == pos())
                    {
                        // this is attribute value a/out quotes. 
                        _startAttrValueNoQuotes = true;
                        _startEq = false;
                        _startEqPos = -1;

                        echo();
                    }
                    else if (_startAttrValueNoQuotes)
                    {
                        echo();
                    }
                }
            }
            else if (_startCDATA)
            {
                echo();
            }
            else if (_startPI)
            {
                _startPI = false;
                _startlt = false;
                _startltPos = -1;
                _startTagName = true; // look for tag name. 

                if (_lookForNextTag)
                {
                    echo_text_node_content();
                }

                _tagNameBuf.Append('!').Append(_tok);
                echo('<').echo('!').echo();
            }
            else if (_startWsContent)
            {
                echo();

                if (_lookAheadStartJsComment)
                {
                    _lookAheadStartJsComment = false;

                    if (_endOfTagSeen)
                    {
                        _endOfTagSeen = false;

                        _startWsEndTag = true;
                        _blockEndTag = true;

                        // begin collect the end tag name. 
                        _endTagNameBuf.Remove(0, _endTagNameBuf.Length);
                        _endTagName = string.Empty;
                    }
                }

                if (_startWsEndTag)
                {
                    _endTagNameBuf.Append(_tok);
                }
            }
            else if (_startComment)
            {
                /* noop; */
            }
            else if (_lookForNextTag)
            {
                _textNodeContent.Append(_tok);
            }
            else if (_lookForEndTag)
            {
                echo();
            }
            else
                echo();
        }
    }
}

资源截图

代码片段和文件信息

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.Regularexpressions;
using System.Threading;
using System.Diagnostics;

namespace Inno.Infrastructure.Utilities
{
    /// 
    /// An fast html compressor tool that can remove unecessary whitespaces comments or compress
    /// the inline scripts or css styles from within html streams. 
    /// 

    public class HtmlCompressor
    {
        private StreamReader _input;
        private StreamWriter _output;
        private Stream _outputStream;
        private long _originalContentLength = 0;
        private long _compressedContentLength = 0;

#if DEBUG
        private StringBuilder _debugOutput = new StringBuilder();
#endif

        private char _tok;

        private const int EOF = -1;

        /// 

        /// Create a default html compressor w/out input and output stream specified. 
        /// However

评论

共有 条评论