一、
使用技术:java、mysql
需要实现的功能:获取html页面中表格的数据,并且把它转存到mysql数据库中
实现思路:
1.通过java url处理中的url和urlconnection获取到html页面的信息
java url学习链接 :https://www.runoob.com/?s=java+url
2.通过BufferedReader(字符缓冲输入流),将html的信息存到一个变量中,方便后面通过正则表达式匹配所需要的值
3.通过使用java.util.regex包下面的Pattern 类和Matcher 类,我们可以得到html页面上我们需要的数据
java正则表达式链接:https://www.runoob.com/java/java-regular-expressions.html
4.将通过正则表达式匹配后获取的值,存入一个list集合中
5.通过循环将list集合中的内容分成相对应数据库每一列的值
6.通过jdbc连接mysql数据库中向里面存值
二、
代码片段:
测试类
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class test {
//连接数据库部分
Connection con;
PreparedStatement ps;
public void saveToMysql(testObject testObject){
try {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (Exception e) {
// TODO: handle exception
System.out.println("加载驱动失败");
}
try {
String url="jdbc:mysql://localhost:3306/数据库名?characterEncoding=utf-8";
String user="root";
String password="root";
con=DriverManager.getConnection(url, user, password);
} catch (Exception e) {
// TODO: handle exception
System.out.println("连接数据库失败");
}
String sql="insert into 表名(Lawyer_Name,Lawyer_Status,Bar_Number,City,Admission_Date) values(?,?,?,?,?)";
ps=con.prepareStatement(sql);
ps.setString(1, testObject.getLawyer_Name());
ps.setString(2, testObject.getStatus());
ps.setInt(3,testObject.getBar_Number());
ps.setString(4, testObject.getCity());
ps.setString(5,testObject.getAdmission_Date());
ps.execute();
if(con!=null){
con.close();
}
if(ps!=null){
ps.close();
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static void main(String []args) throws IOException, ParseException{
//实例化test类,以便调用其中的方法
test test=new test();
//URL(Uniform Resource Locator)统一资源定位符
//创建一个给定的url
URL url=new URL("html的链接地址");
//打开一个链接并运行访问资源
URLConnection connection=url.openConnection();
//缓冲输入流来读数据源
BufferedReader br=new BufferedReader(new InputStreamReader(connection.getInputStream()));
//while循环取数据
String str="";
StringBuffer sb=new StringBuffer();
while((str=br.readLine())!=null){
sb.append(str);
}
//关闭缓冲输入流
br.close();
//将table标签里面的内容通过正则表达式取出
String regex1="<td.*?>(.*?)</td>";
Pattern pattern=Pattern.compile(regex1);
String text=sb.toString();
Matcher matcher=pattern.matcher(text);
//声明一个list对象
List<String> list=new ArrayList<String>();
int count=0;
while(matcher.find()){
list.add(matcher.group(1));
}
testObject testObject=null;
//定义数组来存放从list集合中分装的数据
String[]na=new String[list.size()];
String[]st=new String[list.size()];
String[]b=new String[list.size()];
String[]c=new String[list.size()];
String[]a=new String[list.size()];
for(int i=0;i*5<list.size();i++){
na[i]=list.get(5*i);
}
System.out.println(na[0]);
for(int k=1;5*k-4<list.size();k++){
st[k]=list.get(5*k-4);
}
System.out.println(st[1]);
for(int m=2;m*5-8<list.size();m++){
b[m]=list.get(m*5-8);
}
System.out.println(b[2]);
for(int n=3;n*5-12<list.size();n++){
c[n]=list.get(n*5-12);
}
System.out.println(c[3]);
for(int o=4;o*5-16<list.size();o++){
a[o]=list.get(o*5-16).substring(list.get(o*5-16).lastIndexOf("")-4);
}
System.out.println(a[4]);
for(int i=0;i<list.size()/5;i++){
testObject=new testObject();
testObject.setLawyer_Name(na[i]);
testObject.setStatus(st[i+1]);
testObject.setBar_Number(Integer.parseInt(b[i+2]));
testObject.setCity(c[i+3]);
testObject.setAdmission_Date(a[i+4]);
//调用向mysql数据库存数据的方法
testObject.saveToMysql(testObject);
}
}
}
pojo类,用来存储数据库对应字段的值
public class testObject {
private String Lawyer_Name;
private String Status;
private int Bar_Number;
private String City;
private String Admission_Date;
public String getLawyer_Name() {
return Lawyer_Name;
}
public void setLawyer_Name(String lawyer_Name) {
Lawyer_Name = lawyer_Name;
}
public String getStatus() {
return Status;
}
public void setStatus(String status) {
Status = status;
}
public int getBar_Number() {
return Bar_Number;
}
public void setBar_Number(int bar_Number) {
Bar_Number = bar_Number;
}
public String getCity() {
return City;
}
public void setCity(String city) {
City = city;
}
public String getAdmission_Date() {
return Admission_Date;
}
public void setAdmission_Date(String admission_Date) {
Admission_Date = admission_Date;
}
}
mysql数据库表
CREATE TABLE `tb_bar` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`Lawyer_Name` varchar(500) NOT NULL COMMENT '律师名称',
`Lawyer_Status` varchar(20) NOT NULL COMMENT '状态',
`Bar_Number` int(11) NOT NULL COMMENT '工号',
`City` varchar(20) NOT NULL COMMENT '城市',
`Admission_Date` year(4) NOT NULL COMMENT '入职日期',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=336594 DEFAULT CHARSET=utf8;
实现结果:
html的表格数据
最后插入数据库的数据