package org.wltea.analyzer.dic;
import org.wltea.analyzer.help.ESPluginLoggerFactory;
public class HotDicReloadThread implements Runnable{
private static final org.apache.logging.log4j.Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName());
@Override
public void run() {
while (true){
logger.info("-------重新加载mysql词典--------");
Dictionary.getSingleton().reLoadMainDict();
}
}
}
复制代码
修改org.wltea.analyzer.dic文件夹下的Dictionary
在Dictionary类中加载mysql驱动类
复制代码
private static Properties prop = new Properties();
static {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
logger.error("error", e);
}
}
复制代码
接着,创建重Mysql中加载词典的方法
复制代码
/**
* 从mysql中加载热更新词典
*/
private void loadMySqlExtDict(){
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
try {
Path file = PathUtils.get(getDictRoot(),"jdbc-reload.properties");
prop.load(new FileInputStream(file.toFile()));
logger.info("-------jdbc-reload.properties-------");
for (Object key : prop.keySet()) {
logger.info("key:{}", prop.getProperty(String.valueOf(key)));
}
logger.info("------- 查询词典, sql:{}-------", prop.getProperty("jdbc.reload.sql"));
// 建立mysql连接
connection = DriverManager.getConnection(
prop.getProperty("jdbc.url"),
prop.getProperty("jdbc.user"),
prop.getProperty("jdbc.password")
);
// 执行查询
statement = connection.createStatement();
resultSet = statement.executeQuery(prop.getProperty("jdbc.reload.sql"));
// 循环输出查询啊结果,添加到Main.dict中去
while (resultSet.next()) {
String theWord = resultSet.getString("word");
logger.info("------热更新词典:{}------", theWord);
// 加到mainDict里面
_MainDict.fillSegment(theWord.trim().toCharArray());
}
} catch (Exception e) {
logger.error("error:{}", e);
} finally {
try {
if (resultSet != null) {
resultSet.close();
}
if (statement != null) {
statement.close();
}
if (connection != null) {
connection.close();
}
} catch (SQLException e){
logger.error("error", e);
}
}
}
复制代码
接着,创建加载停用词词典方法
复制代码
/**
* 从mysql中加载停用词
*/
private void loadMySqlStopwordDict(){
Connection conn = null;
Statement stmt = null;
ResultSet rs = null;
try {
Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
prop.load(new FileInputStream(file.toFile()));
logger.info("-------jdbc-reload.properties-------");
for(Object key : prop.keySet()) {
logger.info("-------key:{}", prop.getProperty(String.valueOf(key)));
}
logger.info("-------查询停用词, sql:{}",prop.getProperty("jdbc.reload.stopword.sql"));
conn = DriverManager.getConnection(
prop.getProperty("jdbc.url"),
prop.getProperty("jdbc.user"),
prop.getProperty("jdbc.password"));
stmt = conn.createStatement();
rs = stmt.executeQuery(prop.getProperty("jdbc.reload.stopword.sql"));
while(rs.next()) {
String theWord = rs.getString("word");
logger.info("------- 加载停用词 : {}", theWord);
_StopWords.fillSegment(theWord.trim().toCharArray());
}
Thread.sleep(Integer.valueOf(String.valueOf(prop.get("jdbc.reload.interval"))));
} catch (Exception e) {
logger.error("error", e);
} finally {
try {
if(rs != null) {
rs.close();
}
if(stmt != null) {
stmt.close();
}
if(conn != null) {
conn.close();
}
} catch (SQLException e){
logger.error("error:{}", e);
}
}
}
复制代码
接下来,分别在loadMainDict()方法和loadStopWordDict()方法结尾处调用
复制代码
/**
* 加载主词典及扩展词典
*/
private void loadMainDict() {
// 建立一个主词典实例
_MainDict = new DictSegment((char) 0);
// 读取主词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
loadDictFile(_MainDict, file, false, "Main Dict");
// 加载扩展词典
this.loadExtDict();
// 加载远程自定义词库
this.loadRemoteExtDict();
// 加载Mysql外挂词库
this.loadMySqlExtDict();
}
复制代码
复制代码
/**
* 加载用户扩展的停止词词典
*/
private void loadStopWordDict() {
// 建立主词典实例
_StopWords = new DictSegment((char) 0);
// 读取主词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
loadDictFile(_StopWords, file, false, "Main Stopwords");
// 加载扩展停止词典
List<String> extStopWordDictFiles = getExtStopWordDictionarys();
if (extStopWordDictFiles != null) {
for (String extStopWordDictName : extStopWordDictFiles) {
logger.info("[Dict Loading] " + extStopWordDictName);
// 读取扩展词典文件
file = PathUtils.get(extStopWordDictName);
loadDictFile(_StopWords, file, false, "Extra Stopwords");
}
}
// 加载远程停用词典
List<String> remoteExtStopWordDictFiles = getRemoteExtStopWordDictionarys();
for (String location : remoteExtStopWordDictFiles) {
logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location);
// 如果找不到扩展的字典,则忽略
if (lists == null) {
logger.error("[Dict Loading] " + location + " load failed");
continue;
}
for (String theWord : lists) {
if (theWord != null && !"".equals(theWord.trim())) {
// 加载远程词典数据到主内存中
logger.info(theWord);
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
}
}
}
// 加载Mysql停用词词库
this.loadMySqlStopwordDict();
}
复制代码
最后在initial()方法中启动更新线程
复制代码
/**
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
* 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
*
* @return Dictionary
*/
public static synchronized void initial(Configuration cfg) {
if (singleton == null) {
synchronized (Dictionary.class) {
if (singleton == null) {
singleton = new Dictionary(cfg);
singleton.loadMainDict();
singleton.loadSurnameDict();
singleton.loadQuantifierDict();
singleton.loadSuffixDict();
singleton.loadPrepDict();
singleton.loadStopWordDict();
// 执行更新mysql词库的线程
new Thread(new HotDicReloadThread()).start();
if(cfg.isEnableRemoteDict()){
// 建立监控线程
for (String location : singleton.getRemoteExtDictionarys()) {
// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
}
for (String location : singleton.getRemoteExtStopWordDictionarys()) {
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
}
}
}
}
}
}
复制代码
然后,修改src/main/assemblies/plugin.xml文件中,加入Mysql
复制代码
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>mysql:mysql-connector-java</include>
</includes>
</dependencySet>
复制代码
源码到此修改完成,在自己的数据库中创建两张新的表
建表SQL
复制代码
CREATE TABLE hot_words (
id bigint(20) NOT NULL AUTO_INCREMENT,
word varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '词语',
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
CREATE TABLE hot_stopwords (
id bigint(20) NOT NULL AUTO_INCREMENT,
stopword varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '停用词',
PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
复制代码
深圳网站建设www.sz886.com