从别的网站复制文章的时候,要把图片下载到我们网站,再把图片地址换成我们网站的地址
<img id="mbkenHUwhWeOj9U8K6c8LlAXaes3oXit-M4SnmRvB4wXXEue2ol7MviqfDlnDFgp.jpg" alt="图片" onerror="NextPic('/attached/image/mbkenHUwhWeOj9U8K6c8LlAXaes3oXit-M4SnmRvB4wXXEue2ol7MviqfDlnDFgp.jpg','mbkenHUwhWeOj9U8K6c8LlAXaes3oXit-M4SnmRvB4wXXEue2ol7MviqfDlnDFgp.jpg');" src="http://localhost:8080/attached/image/mbkenHUwhWeOj9U8K6c8LlAXaes3oXit-M4SnmRvB4wXXEue2ol7MviqfDlnDFgp.jpg">
<script type="text/javascript"> window.NextPic=function(url,name){ var cb = "http://localhost:8080"+url console.log("haha"); var src = document.getElementById(name).src if(src.indexOf("rcwimg")<=0) return; document.getElementById(name).src = cb; } </script>
public class WaNewsInfoVo extends Page<WaNewsInfo>{ private Integer id; private String title; private Integer typeId; private String typeName; private Integer typeId2; private String typeName2; private String logo; private String content; private String createTime; private String createUser; private String createUserName; private String tag1; private String tag2; private Integer isDisplay; private Integer browser; private String summary; }
// 编码
private static final String ECODING = "UTF-8";
// 获取img标签正则
private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
// 获取src路径的正则
private static final String IMGSRC_REG = "(http|https):\"?(.*?)(\"|>|\\s+)";
//添加或者修改资讯 public int insertOrUpdate(HttpServletRequest request, WaNewsInfoVo vo){ String aaa = "/attached/image/"; String path = request.getSession().getServletContext().getRealPath("../")+ "/attached/image/"; //获取图片标签 List<String> imgUrl = getImageUrl(vo.getContent()); //获取图片src地址 List<String> imgSrc = getImageSrc(imgUrl); //下载图片 Download(imgSrc,path); String test = replaceHtmlTag(vo.getContent(),"img","src","src=\""+aaa,"\""); vo.setContent(test); if(CommUtil.isEmpty(vo.getId())){ vo.setBrowser(0); return waNewsDao.insert(vo); } return waNewsDao.update(vo); }
/*** * 获取ImageUrl地址 * * @param HTML * @return */ private List<String> getImageUrl(String HTML) { Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML); List<String> listImgUrl = new ArrayList<String>(); while (matcher.find()) { listImgUrl.add(matcher.group()); } return listImgUrl; } /*** * 获取HTML内容 * * @param url * @return * @throws Exception */ private String getHTML(String url) throws Exception { URL uri = new URL(url); URLConnection connection = uri.openConnection(); InputStream in = connection.getInputStream(); byte[] buf = new byte[1024]; int length = 0; StringBuffer sb = new StringBuffer(); while ((length = in.read(buf, 0, buf.length)) > 0) { sb.append(new String(buf, ECODING)); } in.close(); return sb.toString(); } /*** * 获取ImageSrc地址 * * @param listImageUrl * @return */ private List<String> getImageSrc(List<String> listImageUrl) { List<String> listImgSrc = new ArrayList<String>(); for (String image : listImageUrl) { Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image); while (matcher.find()) { listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1)); } } return listImgSrc; } /*** * 下载图片 * * @param listImgSrc */ private void Download(List<String> listImgSrc, String path) { try { for (String url : listImgSrc) { if(url.indexOf("http")>=0){ String imageName = url.substring(url.lastIndexOf("/") + 1, url.length()); URL uri = new URL(url); InputStream in = uri.openStream(); FileOutputStream fo = new FileOutputStream(new File(path,imageName)); byte[] buf = new byte[1024]; int length = 0; System.out.println("开始下载:" + url); while ((length = in.read(buf, 0, buf.length)) != -1) { fo.write(buf, 0, length); } in.close(); fo.close(); System.out.println(path+imageName + "下载完成"); } } } catch (Exception e) { System.out.println("下载失败"); } }
替换img标签
/** * 替换指定标签的属性和值 * @param str 需要处理的字符串 * @param tag 标签名称 * @param tagAttrib 要替换的标签属性值 * @param startTag 新标签开始标记 * @param endTag 新标签结束标记 * @return * @author huweijun * @date 2016年7月13日 下午7:15:32 */ public static String replaceHtmlTag(String str, String tag, String tagAttrib, String startTag, String endTag) { //匹配以<img开头>结尾 String regxpForTag = "<\\s*" + tag + "\\s+([^>]*)\\s*" ; //匹配src="开头,"结尾 String regxpForTagAttrib = tagAttrib + "=\\s*\"([^\"]+)\"" ; //编译后的正则表达式 Pattern patternForTag = Pattern.compile (regxpForTag,Pattern. CASE_INSENSITIVE ); Pattern patternForAttrib = Pattern.compile (regxpForTagAttrib,Pattern. CASE_INSENSITIVE ); //先查<img标签 Matcher matcherForTag = patternForTag.matcher(str); StringBuffer sb = new StringBuffer(); //如果有结果 boolean result = matcherForTag.find(); while (result) { StringBuffer sbreplace = new StringBuffer( "<"+tag+" "); //查到的第一个<img,再匹配src Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag.group(1)); if (matcherForAttrib.find()) { String attributeStr = matcherForAttrib.group(1); String imageName = attributeStr.substring(attributeStr.lastIndexOf("/") + 1, attributeStr.length()); sbreplace.append("id='"+imageName+"' alt='图片' onerror=\"NextPic('/attached/image/"+imageName+"','"+imageName+"');\""); matcherForAttrib.appendReplacement(sbreplace, startTag + "rcwimg" + endTag); } matcherForAttrib.appendTail(sbreplace); matcherForTag.appendReplacement(sb, sbreplace.toString()); result = matcherForTag.find(); } matcherForTag.appendTail(sb); return sb.toString(); }