ES 实现实时从Mysql数据库中读取热词,停用词

package org.wltea.analyzer.dic;

import org.wltea.analyzer.help.ESPluginLoggerFactory;

public class HotDicReloadThread implements Runnable{

private static final org.apache.logging.log4j.Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName());

@Override

public void run() {

while (true){

logger.info("-------重新加载mysql词典--------");

Dictionary.getSingleton().reLoadMainDict();

}

复制代码

修改org.wltea.analyzer.dic文件夹下的Dictionary

在Dictionary类中加载mysql驱动类

复制代码

private static Properties prop = new Properties();

static {

try {

Class.forName("com.mysql.jdbc.Driver");

} catch (ClassNotFoundException e) {

logger.error("error", e);

}

复制代码

接着,创建重Mysql中加载词典的方法

复制代码

/**

* 从mysql中加载热更新词典

private void loadMySqlExtDict(){

Connection connection = null;

Statement statement = null;

ResultSet resultSet = null;

try {

Path file = PathUtils.get(getDictRoot(),"jdbc-reload.properties");

prop.load(new FileInputStream(file.toFile()));

logger.info("-------jdbc-reload.properties-------");

for (Object key : prop.keySet()) {

logger.info("key:{}", prop.getProperty(String.valueOf(key)));

}

logger.info("------- 查询词典, sql:{}-------", prop.getProperty("jdbc.reload.sql"));

// 建立mysql连接

connection = DriverManager.getConnection(

prop.getProperty("jdbc.url"),

prop.getProperty("jdbc.user"),

prop.getProperty("jdbc.password")

);

// 执行查询

statement = connection.createStatement();

resultSet = statement.executeQuery(prop.getProperty("jdbc.reload.sql"));

// 循环输出查询啊结果,添加到Main.dict中去

while (resultSet.next()) {

String theWord = resultSet.getString("word");

logger.info("------热更新词典:{}------", theWord);

// 加到mainDict里面

_MainDict.fillSegment(theWord.trim().toCharArray());

}

} catch (Exception e) {

logger.error("error:{}", e);

} finally {

try {

if (resultSet != null) {

resultSet.close();

}

if (statement != null) {

statement.close();

}

if (connection != null) {

connection.close();

}

} catch (SQLException e){

logger.error("error", e);

}

复制代码

接着,创建加载停用词词典方法

复制代码

/**

* 从mysql中加载停用词

private void loadMySqlStopwordDict(){

Connection conn = null;

Statement stmt = null;

ResultSet rs = null;

try {

Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");

prop.load(new FileInputStream(file.toFile()));

logger.info("-------jdbc-reload.properties-------");

for(Object key : prop.keySet()) {

logger.info("-------key:{}", prop.getProperty(String.valueOf(key)));

}

logger.info("-------查询停用词, sql:{}",prop.getProperty("jdbc.reload.stopword.sql"));

conn = DriverManager.getConnection(

prop.getProperty("jdbc.url"),

prop.getProperty("jdbc.user"),

prop.getProperty("jdbc.password"));

stmt = conn.createStatement();

rs = stmt.executeQuery(prop.getProperty("jdbc.reload.stopword.sql"));

while(rs.next()) {

String theWord = rs.getString("word");

logger.info("------- 加载停用词 : {}", theWord);

_StopWords.fillSegment(theWord.trim().toCharArray());

}

Thread.sleep(Integer.valueOf(String.valueOf(prop.get("jdbc.reload.interval"))));

} catch (Exception e) {

logger.error("error", e);

} finally {

try {

if(rs != null) {

rs.close();

}

if(stmt != null) {

stmt.close();

}

if(conn != null) {

conn.close();

}

} catch (SQLException e){

logger.error("error:{}", e);

}

复制代码

接下来,分别在loadMainDict()方法和loadStopWordDict()方法结尾处调用

复制代码

/**

* 加载主词典及扩展词典

private void loadMainDict() {

// 建立一个主词典实例

_MainDict = new DictSegment((char) 0);

// 读取主词典文件

Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);

loadDictFile(_MainDict, file, false, "Main Dict");

// 加载扩展词典

this.loadExtDict();

// 加载远程自定义词库

this.loadRemoteExtDict();

// 加载Mysql外挂词库

this.loadMySqlExtDict();

}

复制代码

/**

* 加载用户扩展的停止词词典

private void loadStopWordDict() {

// 建立主词典实例

_StopWords = new DictSegment((char) 0);

// 读取主词典文件

Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);

loadDictFile(_StopWords, file, false, "Main Stopwords");

// 加载扩展停止词典

List<String> extStopWordDictFiles = getExtStopWordDictionarys();

if (extStopWordDictFiles != null) {

for (String extStopWordDictName : extStopWordDictFiles) {

logger.info("[Dict Loading] " + extStopWordDictName);

// 读取扩展词典文件

file = PathUtils.get(extStopWordDictName);

loadDictFile(_StopWords, file, false, "Extra Stopwords");

}

// 加载远程停用词典

List<String> remoteExtStopWordDictFiles = getRemoteExtStopWordDictionarys();

for (String location : remoteExtStopWordDictFiles) {

logger.info("[Dict Loading] " + location);

List<String> lists = getRemoteWords(location);

// 如果找不到扩展的字典，则忽略

if (lists == null) {

logger.error("[Dict Loading] " + location + " load failed");

continue;

}

for (String theWord : lists) {

if (theWord != null && !"".equals(theWord.trim())) {

// 加载远程词典数据到主内存中

logger.info(theWord);

_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());

}

// 加载Mysql停用词词库

this.loadMySqlStopwordDict();

}

复制代码

最后在initial()方法中启动更新线程

复制代码

/**

* 词典初始化由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化

* 只有当Dictionary类被实际调用时，才会开始载入词典，这将延长首次分词操作的时间该方法提供了一个在应用加载阶段就初始化字典的手段

* @return Dictionary

public static synchronized void initial(Configuration cfg) {

if (singleton == null) {

synchronized (Dictionary.class) {

if (singleton == null) {

singleton = new Dictionary(cfg);

singleton.loadMainDict();

singleton.loadSurnameDict();

singleton.loadQuantifierDict();

singleton.loadSuffixDict();

singleton.loadPrepDict();

singleton.loadStopWordDict();

// 执行更新mysql词库的线程

new Thread(new HotDicReloadThread()).start();

if(cfg.isEnableRemoteDict()){

// 建立监控线程

for (String location : singleton.getRemoteExtDictionarys()) {

// 10 秒是初始延迟可以修改的 60是间隔时间单位秒

pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);

}

for (String location : singleton.getRemoteExtStopWordDictionarys()) {

pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);

}

复制代码

然后,修改src/main/assemblies/plugin.xml文件中,加入Mysql

复制代码

<include>mysql:mysql-connector-java</include>

</includes>

</dependencySet>

复制代码

源码到此修改完成，在自己的数据库中创建两张新的表

建表SQL

复制代码

CREATE TABLE hot_words (

id bigint(20) NOT NULL AUTO_INCREMENT,

word varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '词语',

PRIMARY KEY (id)

) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

CREATE TABLE hot_stopwords (

id bigint(20) NOT NULL AUTO_INCREMENT,

stopword varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '停用词',

PRIMARY KEY (id)

) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

复制代码

深圳网站建设www.sz886.com