ASP.NET过滤HTML标签只保留换行与空格的方法
本文实例讲述了ASP.NET过滤HTML标签只保留换行与空格的方法。分享给大家供大家参考。具体分析如下:
自己从网上找了一个过滤HTML标签的方法,我也不知道谁的才是原创的,反正很多都一样。我把那方法复制下来,代码如下:
/// <summary> /// 去除HTML标记 /// </summary> /// <paramname="NoHTML">包括HTML的源码 </param> /// <returns>已经去除后的文字</returns> publicstaticstringNoHTML(stringHtmlstring) { //删除脚本 Htmlstring=Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","", RegexOptions.IgnoreCase); //删除HTML Htmlstring=Regex.Replace(Htmlstring,@"<(.[^>]*)>","", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"([\r\n])[\s]+","", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(quot|#34);","\"", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(amp|#38);","&", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(lt|#60);","<", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(gt|#62);",">", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9", RegexOptions.IgnoreCase); Htmlstring=Regex.Replace(Htmlstring,@"&#(\d+);","", RegexOptions.IgnoreCase);
Htmlstring.Replace("<",""); Htmlstring.Replace(">",""); Htmlstring.Replace("\r\n",""); Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); returnHtmlstring; }