SSIS Script Component code for stripping metadata from RFC822 output HTML. Before placing this in the script, add “html” as an output of type DT_NTEXT in the script component.
public override void Input0_ProcessInputRow(Input0Buffer Row)
var blobColumn = Row.description;
if (!blobColumn.IsNull)
var stringData = BlobColumnToString(blobColumn);
Regex regex = new Regex(@"(?i)(?=<html)[\S\s]+(?:<\/html>)");
Match match = regex.Match(stringData);
if (match.Success)
byte[] GetBytes(string str)
byte[] bytes = new byte[str.Length * sizeof(char)];
System.Buffer.BlockCopy(str.ToCharArray(), 0, bytes, 0, bytes.Length);
return bytes;
string BlobColumnToString(BlobColumn blobColumn)
if (blobColumn.IsNull)
return string.Empty;
var blobLength = Convert.ToInt32(blobColumn.Length);
var blobData = blobColumn.GetBlobData(0, blobLength);
var stringData = System.Text.Encoding.Unicode.GetString(blobData);
return stringData;
static string CleanInput(string strIn)
// Replace invalid characters with empty strings.
return Regex.Replace(strIn, @"[\n\t]+", "",
RegexOptions.None, TimeSpan.FromSeconds(1.5));
// If we timeout when replacing invalid characters,
// we should return Empty.
catch (RegexMatchTimeoutException)
return strIn;
No comment yet, add your voice below!