Processing Ajax...

Title

Message

Confirm

Confirm

Confirm

Confirm

Are you sure you want to delete this item?

Confirm

Are you sure you want to delete this item?

Convert Text to HTML List (revised)

Description
Converts the copied text into a HTML list. This Macro takes each line of the copied text and turns it into a HTML list item, then puts them into a HTML ordered or unordered list. This revised version uses spaces instead of tabs, and supports a detection of a few other list types.
Language
C#.net
Minimum Version
Created By
Jon Tackabury (BFS)
Contributors
-
Date Created
Aug 19, 2014
Date Last Modified
Aug 19, 2014

Macro Code

using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;

public static class ClipboardFusionHelper
{
    public static string ProcessText(string text)
    {
        // takes a string of text (typically multiline text) and converts it to a HTML list
        // it tries to be smart enough to identify either a ordered list, or unordered list, but
        // it only detects ordered lists if they start at the beginning (i.e., a, i, A, I, 0, or 1)
        // it also tries to account for cut'n'pasting lists out of MS Word
        // I've been having trouble with Regex $ - it seems to think the $ is between the \r and the \n, which isn't very helpful
        // this code isn't perfect, but it works well for me
        // use at your own discretion

        char chrLineIndent = ' ';         // default to spaces
        int intLineIndentSize = 2;
        string strListType = "ul";        // default to an unordered list
        string strListCSS = "";
        string strOutput = text.Trim();

        // remove multiple white spaces characters and replace them with a single space
        strOutput = Regex.Replace(strOutput, @"[ \t]+", " ");

        // remove empty lines ($ doesn't seem to work very well)
        strOutput = Regex.Replace(strOutput, @"(?<linebreak>[\r\n]{1,2})+", "${linebreak}");

        // trim the start and end of each line (Regex $ doesn't seem to work very well)
        strOutput = Regex.Replace(strOutput, @"(?:^[ \t]*)|(?:[ \t]*(?<linebreak>[\r\n]{1,2}))", "${linebreak}", RegexOptions.Multiline);

        if (Regex.IsMatch(strOutput , @"^[aiAI01][^\w0-9]+"))
        {
            // it looks like we are dealing with some sort of ordered list
            strListType = "ol";

            // lets work out what type of ordered list it is
            switch (strOutput[0].ToString())
            {
                case "a": strListCSS = "lower-alpha"; break;
                case "i": strListCSS = "lower-roman"; break;
                case "A": strListCSS = "upper-alpha"; break;
                case "I": strListCSS = "upper-roman"; break;
                case "0": strListCSS = "decimal-leading-zero"; break;
            }
            strListCSS = strListCSS.Length > 0 ? "list-style-type:" + strListCSS + ";" : "";

            // remove the list item indicators
            strOutput = Regex.Replace(strOutput, @"^[a-zA-Z0-9]+[^\w0-9]+", "", RegexOptions.Multiline);

        }

        else if (Regex.IsMatch(strOutput, @"^[^a-np-zA-NP-Z0-9]+[^\w0-9]+"))
        {
            // it looks like we have some sort of bulleted unordered list
            // typical bullets include •, o, §, #, *, -, ->, =, Ø, etc.
            strListType = "ul";

            // lets work out what type of unordered list it is
            switch (strOutput[0].ToString().ToLower())
            {
                case "o": strListCSS = "circle"; break;
                case "[": strListCSS = "square"; break;
                case "■": strListCSS = "square"; break;
                case "▪": strListCSS = "square"; break;
            }
            strListCSS = strListCSS.Length > 0 ? "list-style-type:" + strListCSS + ";" : "";

            // remove the list item indicators
            strOutput = Regex.Replace(strOutput, @"^[^a-np-zA-NP-Z0-9]+[^\w0-9]+", "", RegexOptions.Multiline);
        }

        string strLineIndent = new string(chrLineIndent, intLineIndentSize);

        // replace line breaks with <li>s (Regex $ doesn't seem to work very well)
        strOutput = Regex.Replace(strOutput, @"(?<linebreak>[\r\n]{1,2})", "</li>${linebreak}" + strLineIndent + "<li>");

        strOutput = String.Format(
            "<{0}{1}>\r\n{2}<li>{3}</li>\r\n</{0}>",
            strListType,
            (strListCSS.Length > 0 ? " style=\"" + strListCSS + "\"" : ""),
            strLineIndent,
            strOutput
            );

        return strOutput;
    }
}