word中复制到编辑器时,替换冗余HTML代码,清理HTML格式,这是最常见的问题,其中方法之一。
js函数如下:
function cWord(html) { html = REReplaceNocase(html,’<o:p>\s*<\/o:p>’,’’,’all’); html = REReplaceNocase(html,’<o:p>.*?<\/o:p>’,’ ’,’all’); html = REReplaceNocase(html,’\s*mso-[^:]+:[^;"]+;?’,’’,’all’); html = REReplaceNocase(html,’\s*MARGIN: 0cm 0cm 0pt\s*;’,’’,’all’); html = REReplaceNocase(html,’\s*MARGIN: 0cm 0cm 0pt\s*"’,’\"’,’all’); html = REReplaceNocase(html,’\s*TEXT-INDENT: 0cm\s*;’,’’,’all’); html = REReplaceNocase(html,’\s*TEXT-INDENT: 0cm\s*"’,’\"’,’all’); html = REReplaceNocase(html,’\s*TEXT-ALIGN: [^\s;]+;?"’,’\"’,’all’); html = REReplaceNocase(html,’\s*PAGE-BREAK-BEFORE: [^\s;]+;?"’,’\"’,’all’); html = REReplaceNocase(html,’\s*FONT-VARIANT: [^\s;]+;?"’,’\"’,’all’); html = REReplaceNocase(html,’\s*tab-stops:[^;"]*;?’,’’,’all’); html = REReplaceNocase(html,’\s*tab-stops:[^"]*’,’’,’all’); html = REReplaceNocase(html,’\s*face="[^"]*"’,’’,’all’); html = REReplaceNocase(html,’\s*face=[^ >]*’,’’,’all’); html = REReplaceNocase(html,’\s*FONT-FAMILY:[^;"]*;?’,’’,’all’); html = REReplaceNocase(html,’<(\w[^>]*) class=([^ |>]*)([^>]*)’,’<\1’,’all’); html = REReplaceNocase(html,’<(\w[^>]*) style="([^\"]*)"([^>]*)’,’<\1’,’all’); html = REReplaceNocase(html,’\s*style="\s*"’,’’,’all’); html = REReplaceNocase(html,’<SPAN\s*[^>]*>\s* \s*<\/SPAN>’,’ ’,’all’); html = REReplaceNocase(html,’<SPAN\s*[^>]*><\/SPAN>’,’’,’all’); html = REReplaceNocase(html,’<(\w[^>]*) lang=([^ |>]*)([^>]*)’,’<\1’,’all’); html = REReplaceNocase(html,’<SPAN\s*>(.*?)<\/SPAN>’,’\1’,’all’); html = REReplaceNocase(html,’<FONT\s*>(.*?)<\/FONT>’,’\1’,’all’); html = REReplaceNocase(html,’<\\?\?xml[^>]*>’,’’,’all’); html = REReplaceNocase(html,’<\/?\w+:[^>]*>’,’’,’all’); html = REReplaceNocase(html,’<H\d>\s*<\/H\d>’,’’,’all’); html = REReplaceNocase(html,’<H1([^>]*)>’,’<div\1><b><font size="6">’,’all’); html = REReplaceNocase(html,’<H2([^>]*)>’,’<div\1><b><font size="5">’,’all’); html = REReplaceNocase(html,’<H3([^>]*)>’,’<div\1><b><font size="4">’,’all’); html = REReplaceNocase(html,’<H4([^>]*)>’,’<div\1><b><font size="3">’,’all’); html = REReplaceNocase(html,’<H5([^>]*)>’,’<div\1><b><font size="2">’,’all’); html = REReplaceNocase(html,’<H6([^>]*)>’,’<div\1><b><font size="1">’,’all’); html = REReplaceNocase(html,’<\/H\d>’,’</font></b>’,’all’); html = REReplaceNocase(html,’<(U|I|STRIKE)> <\/\1>’,’ ’,’all’); html = REReplaceNocase(html,’<([^\s>]+)[^>]*>\s*<\/\1>’,’’,’all’); html = REReplaceNocase(html,’<([^\s>]+)[^>]*>\s*<\/\1>’,’’,’all’); html = REReplaceNocase(html,’<([^\s>]+)[^>]*>\s*<\/\1>’,’’,’all’); html = REReplaceNocase(html,’(<P)([^>]*>.*?)(<\/P>)’,’<div\2’,’all’); return html; }该方法简洁,可根据自己需要进行增删。