前言
本文不是爬虫项目,只是日常工作中遇到的一个小需求,其实数据的收集并不难,难得是数据的后期清洗和补充。
本文主要是调取12306和高德地图api和百度地图api获取结果后进行解析,直接看下边的代码
1、获取现有火车站的基本信息
public void b(){
String url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js";
String connection = null;
try {
connection = HttpClientUtil.doGet(url, null);
// System.out.println(connection);
String[] traiNameGroup = connection.split("@");
for (int i = 0; i < traiNameGroup.length; i++) {
String[] split = traiNameGroup[i].split("\\|");
TrainInfo trainInfo = new TrainInfo();
if (split.length>2){
updateMapper.InsertName(split[1]);
/*
5个字段的插入
trainInfo.setBy1(split[0]);
trainInfo.setTname(split[1]);
trainInfo.setBy2(split[2]);
trainInfo.setBy3(split[3]);
trainInfo.setBy4(split[4]);
trainInfo.setXh(split[5]);
updateMapper.insertTrainName(trainInfo);
*/
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
因为这里我只需要火车站的名字,所以我只是解析获取的字段,如果你需要别的,也可以自己解析,直接在浏览器中点开链接就可以看到结果
https://kyfw.12306.cn/otn/resources/js/framework/station_name.js
2、根据名字的不同调用百度地图接口api
首先登陆百度地图api网站,登录后进入控制台,选择我的应用,创建web服务应用,这样就可以了,下面的ak,就是你访问的钥匙
这里我们可以看下百度地图的api文档
不理解的可以先看看文档,下面直接代码了,代码中有大量的try...catch保证程序的运行(有许多字段为空。数据残缺),因为只是为了更新字典表,所以本文代码格式有点乱【汗颜】,代码最后的sleep是为了减少并发,否则百度会给你报警,除非你是RMB玩家。代码粘贴复制,记住添加你的ak。
@Component
public class LocationServer {
@Autowired
UpdateMapper updateMapper;
public void a(){
//读取城市名 上一步的我们直接存入数据库中,打上标志,代表全文件
List<String> stringList = updateMapper.selectTrainCity();
for (String s : stringList) {
String url = "http://api.map.baidu.com/place/v2/search?query=火车站®ion="+s+"&extensions_adcode=true&output=json&ak=你的ak";
String connection = null;
try {
connection = HttpClientUtil.doGet(url, null);
// System.out.println(connection);
} catch (Exception e) {
e.printStackTrace();
}
//解析json
JSONObject jsonObject = null;
try {
jsonObject = new JSONObject(connection);
if (jsonObject == null){
continue;
}
} catch (JSONException e) {
e.printStackTrace();
}
JSONArray jsonArray = null;
try {
jsonArray = jsonObject.getJSONArray("results");
} catch (JSONException e) {
e.printStackTrace();
}
if (jsonArray ==null){
continue;
}
//循环数组
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject1 = null;
try {
if (jsonArray == null) {
continue;
}
jsonObject1 = jsonArray.getJSONObject(i);
} catch (JSONException e) {
e.printStackTrace();
}
try {
String name = null;
try {
name = jsonObject1.getString("name");
} catch (JSONException e) {
e.printStackTrace();
}
String lat = null;
try {
lat = jsonObject1.getJSONObject("location").getString("lat");
} catch (JSONException e) {
e.printStackTrace();
}
String lng = null;
try {
lng = jsonObject1.getJSONObject("location").getString("lng");
} catch (JSONException e) {
e.printStackTrace();
}
String address = null;
try {
address = jsonObject1.getString("address");
} catch (JSONException e) {
e.printStackTrace();
}
String province = null;
try {
province = jsonObject1.getString("province");
} catch (JSONException e) {
e.printStackTrace();
}
String city = null;
try {
city = jsonObject1.getString("city");
} catch (JSONException e) {
e.printStackTrace();
}
String area = null;
try {
area = jsonObject1.getString("area");
} catch (JSONException e) {
e.printStackTrace();
}
String street_id = null;
try {
street_id = jsonObject1.getString("street_id");
} catch (JSONException e) {
e.printStackTrace();
}
String detail = null;
try {
detail = jsonObject1.getString("detail");
} catch (JSONException e) {
e.printStackTrace();
}
String uid = null;
try {
uid = jsonObject1.getString("uid");
} catch (JSONException e) {
e.printStackTrace();
}
String adcode = null;
try {
adcode = jsonObject1.getString("adcode");
} catch (JSONException e) {
e.printStackTrace();
}
//封装实体
TrainDomain trainDomain = new TrainDomain(name, lat, lng, address, province, city, area, street_id, detail, uid, adcode);
updateMapper.insertTrainLocation(trainDomain);
System.out.println(trainDomain);
} catch (Exception e) {
e.printStackTrace();
}
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
3、使用高德地图
建议优先使用高德地图,两个api服务互为补充,但是高德明显强大的多。前面注册和百度地图的类似,
public class GdLocationServer {
@Autowired
UpdateMapper updateMapper;
public void a(){
//查询状态为o的Train-name
List<String> stringList = updateMapper.selectNameByZt();
for (String s : stringList) {
String url = "https://restapi.amap.com/v3/place/text?key=你的key&keywords="+s+"&types=火车站&city=&children=&offset=&page=&extensions=all";
String connection = null;
try {
connection = HttpClientUtil.doGet(url, null);
} catch (Exception e) {
e.printStackTrace();
}
//解析json
JSONObject jsonObject = null;
try {
jsonObject = new JSONObject(connection);
} catch (JSONException e) {
e.printStackTrace();
}
if (jsonObject == null){
continue;
}
JSONObject pois = null;
try {
JSONArray pois1 = jsonObject.getJSONArray("pois");
if (pois1 == null){
continue;
}
pois = pois1.getJSONObject(0);
if (pois == null){
continue;
}
} catch (JSONException e) {
e.printStackTrace();
}
if (pois == null){
continue;
}
String name =null;
try {
name = pois.getString("name");
} catch (JSONException e) {
e.printStackTrace();
}
String address =null;
try {
address = pois.getString("address");
} catch (JSONException e) {
e.printStackTrace();
}
String lng = null;
String lat = null;
try {
String location = pois.getString("location");
String[] split = location.split(",");
lng = split[0];
lat = split[1];
} catch (JSONException e) {
e.printStackTrace();
}
String province = null;
try {
province = pois.getString("pname");
} catch (JSONException e) {
e.printStackTrace();
}
String cityname =null;
try {
cityname = pois.getString("cityname");
} catch (JSONException e) {
e.printStackTrace();
}
String adname = null;
try {
adname = pois.getString("adname");
} catch (JSONException e) {
e.printStackTrace();
}
String adcode =null;
try {
adcode = pois.getString("adcode");
} catch (JSONException e) {
e.printStackTrace();
}
TrainDomain trainDomain = new TrainDomain();
trainDomain.setName(name);
trainDomain.setLat(lat);
trainDomain.setLng(lng);
trainDomain.setAdcode(adcode);
trainDomain.setProvince(province);
trainDomain.setArea(adname);
trainDomain.setCity(cityname);
trainDomain.setAddress(province+cityname+adname+address);
updateMapper.insertTrainLocation(trainDomain);
System.out.println(trainDomain);
}
}
}
得到的结果你就可以根据你的要求进行痛苦的清洗和补充的数据了,下面补充我写的http工具类,你也可以换成自己的,都可以
public class HttpClientUtil {
private static final Logger logger = LoggerFactory.getLogger(HttpClientUtil.class);
/**
* @Title: doGet
* @Description: get方式
* @param :url请求路径
* @param :params参数
* @author Mundo
*/
public static String doGet(String url, Map<String, String> params) {
// 返回结果
String result = "";
// 创建HttpClient对象
HttpClient httpClient = HttpClientBuilder.create().build();
HttpGet httpGet = null;
try {
// 拼接参数,可以用URIBuilder,也可以直接拼接在?传值,拼在url后面,如下--httpGet = new
// HttpGet(uri+"?id=123");
URIBuilder uriBuilder = new URIBuilder(url);
if (null != params && !params.isEmpty()) {
for (Map.Entry<String, String> entry : params.entrySet()) {
uriBuilder.addParameter(entry.getKey(), entry.getValue());
// 或者用
// 顺便说一下不同(setParameter会覆盖同名参数的值,addParameter则不会)
// uriBuilder.setParameter(entry.getKey(), entry.getValue());
}
}
URI uri = uriBuilder.build();
// 创建get请求
httpGet = new HttpGet(uri);
logger.info("访问路径:" + uri);
HttpResponse response = httpClient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {// 返回200,请求成功
// 结果返回
result = EntityUtils.toString(response.getEntity());
logger.info("请求成功!,返回数据:" + result);
} else {
logger.info("请求失败!");
}
} catch (Exception e) {
logger.info("请求失败!");
} finally {
// 释放连接
if (null != httpGet) {
httpGet.releaseConnection();
}
}
return result;
}
/**
* @Title: doPost
* @Description: post请求
* @param url
* @param params
* @return
* @author Mundo
*/
public static String doPost(String url, Map<String, String> params) {
String result = "";
// 创建httpclient对象
HttpClient httpClient = HttpClientBuilder.create().build();
HttpPost httpPost = new HttpPost(url);
try { // 参数键值对
if (null != params && !params.isEmpty()) {
List<NameValuePair> pairs = new ArrayList<NameValuePair>();
NameValuePair pair = null;
for (String key : params.keySet()) {
pair = new BasicNameValuePair(key, params.get(key));
pairs.add(pair);
}
// 模拟表单
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(pairs);
httpPost.setEntity(entity);
}
HttpResponse response = httpClient.execute(httpPost);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
result = EntityUtils.toString(response.getEntity(), "utf-8");
logger.info("返回数据:>>>" + result);
} else {
logger.info("请求失败!,url:" + url);
}
} catch (Exception e) {
logger.error("请求失败");
e.printStackTrace();
} finally {
if (null != httpPost) {
// 释放连接
httpPost.releaseConnection();
}
}
return result;
}
/**
* @Title: sendJsonStr
* @Description: post发送json字符串
* @param url
* @param params
* @return 返回数据
* @author Mundo
*/
public static String sendJsonStr(String url, String params) {
String result = "";
HttpClient httpClient = HttpClientBuilder.create().build();
HttpPost httpPost = new HttpPost(url);
try {
httpPost.addHeader("Content-type", "application/json; charset=utf-8");
httpPost.setHeader("Accept", "application/json");
if (!StringUtils.isEmpty(params)){
httpPost.setEntity(new StringEntity(params, Charset.forName("UTF-8")));
}
HttpResponse response = httpClient.execute(httpPost);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
result = EntityUtils.toString(response.getEntity());
logger.info("返回数据:" + result);
} else {
logger.info("请求失败");
}
} catch (IOException e) {
logger.error("请求异常");
}
return result;
}
}