方式一:
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
public class URLUtil {
private final static Set<String> PublicSuffixSet = new HashSet<String>(
Arrays.asList(new String(
"com|org|net|gov|edu|co|tv|mobi|info|asia|xxx|onion|cn|com.cn|edu.cn|gov.cn|net.cn|org.cn|jp|kr|tw|com.hk|hk|com.hk|org.hk|se|com.se|org.se")
.split("\\|")));
private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
/**
* 获取url的*域名
* @param url
* @return
*/
public static String getDomainName(URL url) {
String host = url.getHost();
if (host.endsWith(".")){
host = host.substring(0, host.length() - 1);
}
if (IP_PATTERN.matcher(host).matches()){
return host;
}
int index = 0;
String candidate = host;
for (; index >= 0;) {
index = candidate.indexOf('.');
String subCandidate = candidate.substring(index + 1);
if (PublicSuffixSet.contains(subCandidate)) {
return candidate;
}
candidate = subCandidate;
}
return candidate;
}
/**
* 获取url的*域名
* @param url
* @return
* @throws MalformedURLException
*/
public static String getDomainName(String url) throws MalformedURLException {
return getDomainName(new URL(url));
}
/**
* 判断两个url*域名是否相等
* @param url1
* @param url2
* @return
*/
public static boolean isSameDomainName(URL url1, URL url2) {
return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
}
/**
* 判断两个url*域名是否相等
* @param url1
* @param url2
* @return
* @throws MalformedURLException
*/
public static boolean isSameDomainName(String url1, String url2)
throws MalformedURLException {
return isSameDomainName(new URL(url1), new URL(url2));
}
public static void main(String[] args) throws Exception {
String urlStr = "http://news.hexun.com/2017-09-23/190978248.html";
getDomainName(urlStr);
getDomainName(new URL(urlStr));
}
}
方式二:
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DomainUtils {
/**
* 获取url的*域名
* @param
* @return
*/
public static String getTopDomain(String url){
try{
//获取值转换为小写
String host = new URL(url).getHost().toLowerCase();//news.hexun.com
Pattern pattern = Pattern.compile("[^\\.]+(\\.com\\.cn|\\.net\\.cn|\\.org\\.cn|\\.gov\\.cn|\\.com|\\.net|\\.cn|\\.org|\\.cc|\\.me|\\.tel|\\.mobi|\\.asia|\\.biz|\\.info|\\.name|\\.tv|\\.hk|\\.公司|\\.中国|\\.网络)");
Matcher matcher = pattern.matcher(host);
while(matcher.find()){
return matcher.group();
}
}catch(MalformedURLException e){
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
System.out.println(getTopDomain("http://news.hexun.com/2017-09-23/190978248.html"));//hexun.com
}
}