首页 技术 正文
技术 2022年11月6日
0 收藏 622 点赞 685 浏览 13284 个字

使用Spring Data ElasticSearch+Jsoup操作集群数据存储

1、使用Jsoup爬取京东商城的商品数据

1)获取商品名称、价格以及商品地址,并封装为一个Product对象,代码截图:

2)创建Product实体类,完成对索引、类型、映射以及文档的配置,代码截图:

3)将爬取到的商品对象存储到集群中,代码截图:

4)完成对商品信息的查询、分页、删除和更新操作,代码截图:

applicationContext.xml

 1 <?xml version="1.0" encoding="UTF-8"?>
2 <beans xmlns="http://www.springframework.org/schema/beans"
3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
4 xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch"
5 xsi:schemaLocation="http://www.springframework.org/schema/beans
6 http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd">
7 <!--开启包扫描-->
8 <context:component-scan base-package="com.elasticsearch"/>
9 <!--配置集群信息-->
10 <elasticsearch:transport-client id="esClient" cluster-name="my-cluster" cluster-nodes="127.0.0.1:9301,
11 127.0.0.1:9302,127.0.0.1:9303"/>
12 <!--注入ESTemplate模板-->
13 <bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchTemplate">
14 <constructor-arg name="client" ref="esClient"/>
15 </bean>
16 <!--扫描Mapper(mybatis中直接操作数据),在对应的包下BeanMapper,Bean在ES中是类型(表)-->
17 <elasticsearch:repositories base-package="com.elasticsearch.mapper"/>
18 </beans>

pom.xml

  1 <?xml version="1.0" encoding="UTF-8"?>
2
3 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5 <modelVersion>4.0.0</modelVersion>
6
7 <groupId>com.elasticsearch</groupId>
8 <artifactId>eshm0430</artifactId>
9 <version>1.0-SNAPSHOT</version>
10
11 <name>eshm0430</name>
12 <!-- FIXME change it to the project's website -->
13 <url>http://www.example.com</url>
14
15 <properties>
16 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
17 <maven.compiler.source>1.7</maven.compiler.source>
18 <maven.compiler.target>1.7</maven.compiler.target>
19 </properties>
20
21 <dependencies>
22 <dependency>
23 <groupId>junit</groupId>
24 <artifactId>junit</artifactId>
25 <version>4.11</version>
26 <scope>test</scope>
27 </dependency>
28
29
30 <dependency>
31 <groupId>org.springframework.data</groupId>
32 <artifactId>spring-data-elasticsearch</artifactId>
33 <version>3.1.9.RELEASE</version>
34 <exclusions>
35 <exclusion>
36 <groupId>org.elasticsearch.plugin</groupId>
37 <artifactId>transport‐netty4‐client</artifactId>
38 </exclusion>
39 </exclusions>
40 </dependency>
41
42 <dependency>
43 <groupId>org.springframework</groupId>
44 <artifactId>spring-test</artifactId>
45 <version>5.1.5.RELEASE</version>
46 <scope>test</scope>
47 </dependency>
48 <dependency>
49 <groupId>junit</groupId>
50 <artifactId>junit</artifactId>
51 <version>4.12</version>
52 <scope>compile</scope>
53 </dependency>
54 <dependency>
55 <groupId>org.springframework</groupId>
56 <artifactId>spring-test</artifactId>
57 <version>5.2.5.RELEASE</version>
58 <scope>compile</scope>
59 </dependency>
60 <!--// jsoup-->
61 <dependency>
62 <groupId>org.jsoup</groupId>
63 <artifactId>jsoup</artifactId>
64 <version>1.11.3</version>
65 </dependency>
66 </dependencies>
67
68 <build>
69 <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
70 <plugins>
71 <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
72 <plugin>
73 <artifactId>maven-clean-plugin</artifactId>
74 <version>3.1.0</version>
75 </plugin>
76 <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
77 <plugin>
78 <artifactId>maven-resources-plugin</artifactId>
79 <version>3.0.2</version>
80 </plugin>
81 <plugin>
82 <artifactId>maven-compiler-plugin</artifactId>
83 <version>3.8.0</version>
84 </plugin>
85 <plugin>
86 <artifactId>maven-surefire-plugin</artifactId>
87 <version>2.22.1</version>
88 </plugin>
89 <plugin>
90 <artifactId>maven-jar-plugin</artifactId>
91 <version>3.0.2</version>
92 </plugin>
93 <plugin>
94 <artifactId>maven-install-plugin</artifactId>
95 <version>2.5.2</version>
96 </plugin>
97 <plugin>
98 <artifactId>maven-deploy-plugin</artifactId>
99 <version>2.8.2</version>
100 </plugin>
101 <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
102 <plugin>
103 <artifactId>maven-site-plugin</artifactId>
104 <version>3.7.1</version>
105 </plugin>
106 <plugin>
107 <artifactId>maven-project-info-reports-plugin</artifactId>
108 <version>3.0.0</version>
109 </plugin>
110 </plugins>
111 </pluginManagement>
112 <plugins>
113 <plugin>
114 <groupId>org.apache.maven.plugins</groupId>
115 <artifactId>maven-compiler-plugin</artifactId>
116 <configuration>
117 <source>8</source>
118 <target>8</target>
119 </configuration>
120 </plugin>
121 </plugins>
122 </build>
123 </project>

Product

 1 package com.elasticsearch.entity;
2 import org.springframework.data.annotation.Id;
3 import org.springframework.data.elasticsearch.annotations.Document;
4 import org.springframework.data.elasticsearch.annotations.Field;
5 import org.springframework.data.elasticsearch.annotations.FieldType;
6
7 @Document(indexName = "my-index3", type = "Product")
8 public class Product {
9 @Id
10
11 @Field(type = FieldType.Long,index = false,store = true)
12 private Long id;
13
14 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
15 private String pname;
16
17 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
18 private String pprice;
19
20 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
21 private String padress;
22
23 public Long getId() {
24 return id;
25 }
26
27 public void setId(Long id) {
28 this.id = id;
29 }
30
31 public String getPname() {
32 return pname;
33 }
34
35 public void setPname(String pname) {
36 this.pname = pname;
37 }
38
39 public String getPprice() {
40 return pprice;
41 }
42
43 public void setPprice(String pprice) {
44 this.pprice = pprice;
45 }
46
47 public String getPadress() {
48 return padress;
49 }
50
51 public void setPadress(String padress) {
52 this.padress = padress;
53 }
54
55 @Override
56 public String toString() {
57 return "Product{" +
58 "id=" + id +
59 ", pname='" + pname + '\'' +
60 ", pprice='" + pprice + '\'' +
61 ", padress='" + padress + '\'' +
62 '}';
63 }
64 }

ProductMapper

 1 package com.elasticsearch.mapper;
2
3 import com.elasticsearch.entity.Product;
4 import org.springframework.data.domain.Pageable;
5 import org.springframework.data.elasticsearch.repository.ElasticsearchCrudRepository;
6 import org.springframework.stereotype.Repository;
7
8 import java.util.List;
9
10 @Repository
11 public interface ProductMapper extends ElasticsearchCrudRepository <Product,Long> {
12
13
14 // 根据标题查询并分页
15 List<Product> findByPname(String pname , Pageable pageable);
16
17 }

ProductService

 1 package com.elasticsearch.service;
2
3 import com.elasticsearch.entity.Product;
4 import org.springframework.data.domain.Pageable;
5
6 import java.util.List;
7 import java.util.Optional;
8
9
10 public interface ProductService {
11
12 // 新增文档的方法
13 void save(Product product);
14
15 // 根据文档查询商品信息
16 Optional<Product> findById(Long id);
17
18 // 根据id删除
19 void deleteById(Long id);
20
21 // 根据id更新文件
22 void updateById(Product product);
23
24 // 根据标题查询并分页
25 List<Product> findByPname(String pname ,Pageable pageable);
26
27
28 }

ProductServiceImp

 1 package com.elasticsearch.service.Imp;
2
3 import com.elasticsearch.entity.Product;
4 import com.elasticsearch.mapper.ProductMapper;
5 import com.elasticsearch.service.ProductService;
6 import org.springframework.beans.factory.annotation.Autowired;
7 import org.springframework.data.domain.Pageable;
8 import org.springframework.stereotype.Service;
9
10 import java.util.List;
11 import java.util.Optional;
12
13 @Service("ProductService")
14 public class ProductServiceImp implements ProductService {
15 @Autowired
16 private ProductMapper productMapper;
17
18 @Override
19 public void save(Product product) {
20 productMapper.save(product);
21 }
22
23 @Override
24 public Optional<Product> findById(Long id) {
25 return productMapper.findById(id);
26 }
27
28 @Override
29 public void deleteById(Long id) {
30 productMapper.deleteById(id);
31 }
32
33 @Override
34 public void updateById(Product product) {
35 productMapper.save(product);
36 }
37
38 @Override
39 public List<Product> findByPname(String pname, Pageable pageable) {
40 return productMapper.findByPname(pname,pageable);
41 }
42 }

SpringDataESTest

  1 package com.elasticsearch;
2 import com.elasticsearch.entity.Product;
3 import com.elasticsearch.service.ProductService;
4 import org.jsoup.Jsoup;
5 import org.jsoup.nodes.Document;
6 import org.jsoup.nodes.Element;
7 import org.jsoup.select.Elements;
8 import org.junit.Test;
9 import org.junit.runner.RunWith;
10 import org.springframework.beans.factory.annotation.Autowired;
11 import org.springframework.data.domain.PageRequest;
12 import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
13 import org.springframework.test.context.ContextConfiguration;
14 import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
15
16 import java.io.IOException;
17 import java.util.List;
18 import java.util.Optional;
19
20 @RunWith(SpringJUnit4ClassRunner.class)
21 @ContextConfiguration(locations = "classpath:applicationContext.xml")
22 public class SpringDataESTest {
23
24 @Autowired
25 private ElasticsearchTemplate elasticsearchTemplate;
26
27 @Autowired
28 private ProductService productService;
29
30 @Test //import org.junit.Test; 不要自己创建一个名称为Test类
31 public void createIndex() {
32 //创建空的索引库
33 elasticsearchTemplate.createIndex(Product.class);
34 //添加映射
35 elasticsearchTemplate.putMapping(Product.class);
36 }
37
38 // 创建
39 @Test
40 public void createDocument(){
41 Document doc = null;
42 String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&psort=3&click=0";
43 // String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=4cbce742a5634b66996fa09045840c0e";
44 try {
45 doc = Jsoup.connect(url).get();
46 //Element:页面中的所有 ul > li , li特点是 class = gl-item,使用类选择器
47 Elements liLists = doc.select(".gl-item");
48 long i=0;
49 for (Element li : liLists) {
50 //分析 li 结构
51 //1)获取图片地址 class= p-img ,查找img标签,获取 img 的src 属性的值
52 //String pimgsrc = li.select(".p-img").select("img").attr("src");
53 //System.out.println(pimgsrc);
54
55 //2)获取商品价格: class = p-price ,查找 i 标签,获取 i 标签包含的内容 <i>12324</i>
56 String pprice = li.select(".p-price").select("i").text();
57 System.out.println(pprice);
58
59 //3)获取商品名称: class= p-name p-name-type-2,查找 em 标签,获取 em 标签的内容
60 String pname = li.select(".p-name").select(".p-name-type-2").select("em").text();
61 String pname2 = li.select("div[class='p-name p-name-type-2']").select("em").text();
62
63 System.out.println(pname);
64 System.out.println(pname2);
65
66 //4)获取商品地址
67 String padress = li.select(".p-img").select("a").attr("href");
68 System.out.println(padress);
69 i++;
70 Product product = new Product();
71 product.setId(i);
72 product.setPname(pname);
73 product.setPprice(pprice);
74 product.setPadress(padress);
75
76 productService.save(product);
77 }
78 } catch (IOException e) {
79 e.printStackTrace();
80 }
81 }
82
83 @Test
84 public void getDocumentById(){
85 Optional<Product> byId = productService.findById(1L);
86 Product product = byId.get();
87 System.out.println("根据id查询"+product);
88
89 }
90
91 // 根据id删除文件
92 @Test
93 public void deleteDocumentById(){
94 productService.deleteById(30L);
95
96 }
97
98 @Test
99 // 根据id更新文件
100 public void updateDocumentById(){
101 Product product = new Product();
102 product.setId(29L);
103 product.setPprice("2");
104 product.setPname("根据id更新的名字");
105 product.setPadress("更新的");
106 productService.updateById(product);
107 System.out.println("更新后的文件"+product);
108 }
109
110 // 根据title查询 并且分页
111 @Test
112 public void getDocumentByPnameAndPage(){
113 List<Product> byPnameAndPage = productService.findByPname("华为", PageRequest.of(0, 10));
114 System.out.println(byPnameAndPage);
115 }
116
117
118
119
120 // 创建
121 // @Test
122 // public void createDocument(){
123 // for (Long i = 1L;i <= 10L; i++){
124 // // 批量创建Hello对象
125 // Hello hello = new Hello();
126 // hello.setId(i);
127 // hello.setTitle("新增的title"+i);
128 // hello.setContent("新增的content"+i);
129 // helloService.save(hello);
130 // }
131 //
132 // }
133 //
134 // // 根据id查询
135 // @Test
136 // public void getDocumentById(){
137 // Optional<Hello> helloOptional = helloService.findById(1L);
138 // Hello hello = helloOptional.get();
139 // System.out.println("根据id查询hello:"+hello);
140 // }
141 //
142 // // 查询所有hello
143 // @Test
144 // public void getAllDocument(){
145 // Iterable<Hello> all = helloService.findAll();
146 //
147 // //方法一
148 //// Iterator<Hello> iterator = all.iterator();// 10个
149 //// while (iterator.next() != null){
150 //// Hello hello = iterator.next();
151 //// System.out.println("查询所有hello"+hello);
152 //// }
153 //
154 // // 方法二
155 // // forEach(Consumer),Consumer接口通过@FunctionallInterface修饰
156 // // 表示他是一个函数式
157 // // 如果一个方法是形参是函数接口,传递形参时可以使用Lambda表达式,特点是使用箭头符号
158 // // void accept(T t)
159 // all.forEach(item-> System.out.println("查询所有hello"+item));
160 //
161 //
162 // }
163 //
164 // // 根据id更新
165 // @Test
166 // public void updateDocumentById(){
167 // Hello hello = new Hello();
168 // hello.setId(1L);
169 // hello.setTitle("更新修改的title");
170 // hello.setContent("更新修改的Content");
171 // helloService.save(hello);
172 // System.out.println("更新后的为"+hello);
173 // }
174 //
175 // // 根据id删除文档
176 // @Test
177 // public void deleteDocumentById(){
178 // helloService.deleteById(10L);
179 // }
180 //
181 // // 删除所有文档
182 //// @Test
183 //// public void deleteAllDocument(){
184 //// helloService.deleteAll();
185 //// }
186 //
187 // // 根据title查询
188 // @Test
189 // public void getDocumentByTitle(){
190 // List<Hello> hs = helloService.findByTitle("新增");
191 // System.out.println(hs);
192 // }
193 //
194 // @Test
195 // public void getDocumentByTitleAndPage(){
196 // List<Hello> hs = helloService.findByTitle("新增");
197 // System.out.println(hs);
198 //
199 // List<Hello> hs1 = helloService.findByTitle("新增", PageRequest.of(1,3));
200 // System.out.println("---------"+hs1);
201 // }
202
203
204
205 }

1

//        System.out.println(hs);
相关推荐
python开发_常用的python模块及安装方法
adodb:我们领导推荐的数据库连接组件bsddb3:BerkeleyDB的连接组件Cheetah-1.0:我比较喜欢这个版本的cheeta…
日期:2022-11-24 点赞:878 阅读:9,487
Educational Codeforces Round 11 C. Hard Process 二分
C. Hard Process题目连接:http://www.codeforces.com/contest/660/problem/CDes…
日期:2022-11-24 点赞:807 阅读:5,903
下载Ubuntn 17.04 内核源代码
zengkefu@server1:/usr/src$ uname -aLinux server1 4.10.0-19-generic #21…
日期:2022-11-24 点赞:569 阅读:6,736
可用Active Desktop Calendar V7.86 注册码序列号
可用Active Desktop Calendar V7.86 注册码序列号Name: www.greendown.cn Code: &nb…
日期:2022-11-24 点赞:733 阅读:6,487
Android调用系统相机、自定义相机、处理大图片
Android调用系统相机和自定义相机实例本博文主要是介绍了android上使用相机进行拍照并显示的两种方式,并且由于涉及到要把拍到的照片显…
日期:2022-11-24 点赞:512 阅读:8,127
Struts的使用
一、Struts2的获取  Struts的官方网站为:http://struts.apache.org/  下载完Struts2的jar包,…
日期:2022-11-24 点赞:671 阅读:5,289