假设下面是你的视频网站链接列表,如果别人想爬取你的数据十分轻松,看规则就知道数据库是序列自增的
http://www.xxxx.com/video/1
http://www.xxxx.com/video/2
http://www.xxxx.com/video/3
那么解决这一问题,我们可以使用短地址,不对外暴露真实链接,使用对称加密是一个很好的方案。
Hashids是一个很好的选择,它提供了JS/PHP/JAVA/PYTHON等编程语言的实现,这里我使用的就是它。
下面是我基于blade框架搭建的java短地址服务。
CREATE TABLE `t_url` (
`id` int(10) NOT NULL AUTO_INCREMENT,
`url` text NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=utf8;
路由
@Path
public class IndexRoute {
// 盐值
private static final Hashids HASHIDS = new Hashids("blade-shorturl");
private UrlModel urlModel = new UrlModel();
@Route("/:key")
public void get(Request req, Response response) {
String key = req.pathParam(":key").replaceAll("[^A-Za-z0-9]", "");
long[] numbers = HASHIDS.decode(key);
if (null == numbers || numbers.length < 1) {
response.text("没有找到");
return;
}
int id = (int) numbers[0];
String result = get(id).getUrl();
if (result == null) {
response.text("没有找到");
return;
}
response.redirect(result);
}
@Route(value = "/", method = HttpMethod.GET)
public String index() {
return "index";
}
@Route(value = "/", method = HttpMethod.POST)
public String save(Request request, Response response) {
String resJsp = "index";
String longUrl = request.query("url");
if (!isURL(longUrl)) {
request.attribute("error", "无效的URL");
return resJsp;
}
Integer id = this.save(longUrl);
if (id == 0) {
request.attribute("error", "保存失败");
return resJsp;
}
String hash = HASHIDS.encode(id);
request.attribute("url_hash", hash);
System.out.println("id = " + id + ",url_hash=" + hash);
return resJsp;
}
private Integer save(String url) {
return urlModel.insert().param("url", url).executeAndCommit();
}
private UrlModel get(int id) {
return urlModel.fetchByPk(id);
}
private final String REGEX = "\\b(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]";
private boolean isURL(String url) {
if(StringKit.isNotBlank(url)){
Pattern pattern = Pattern.compile(REGEX);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
return true;
}
}
return false;
}
}
实现效果: