Thursday, September 22, 2011

Remove HTML tags from a string using C#

public static class RemoveHtmlFromString
{
    /// <summary>
    /// Remove HTML from string with Regex.
    /// </summary>
    public static string StripTagsRegex(string source)
    {
 return Regex.Replace(source, "<.*?>", string.Empty);
    }

    /// <summary>
    /// Compiled regular expression for performance.
    /// </summary>
    static Regex _htmlRegex = new Regex("<.*?>", RegexOptions.Compiled);

    /// <summary>
    /// Remove HTML from string with compiled Regex.
    /// </summary>
    public static string StripTagsRegexCompiled(string source)
    {
 return _htmlRegex.Replace(source, string.Empty);
    }

    /// <summary>
    /// Remove HTML tags from string using char array.
    /// </summary>
    public static string StripTagsCharArray(string source)
    {
 char[] array = new char[source.Length];
 int arrayIndex = 0;
 bool inside = false;

 for (int i = 0; i < source.Length; i++)
 {
     char let = source[i];
     if (let == '<')
     {
  inside = true;
  continue;
     }
     if (let == '>')
     {
  inside = false;
  continue;
     }
     if (!inside)
     {
  array[arrayIndex] = let;
  arrayIndex++;
     }
 }
 return new string(array, 0, arrayIndex);
    }
}

No comments:

Post a Comment

Thank you for Commenting Will reply soon ......

Featured Posts

#Linux Commands Unveiled: #date, #uname, #hostname, #hostid, #arch, #nproc

 #Linux Commands Unveiled: #date, #uname, #hostname, #hostid, #arch, #nproc Linux is an open-source operating system that is loved by millio...