起因:
- 我们在 gremlin 的配置类中向springboot 分别注入 Neo4JGraph 和 GraphTraversalSource 两个bean
@Configuration
public class GraphDatabaseInitConfig {
@Value("${spring.data.neo4j.password}")
private String neo4jPassword;
@Value("${spring.data.neo4j.username}")
private String neo4jUsername;
@Value("${spring.data.neo4j.uri}")
private String neo4jUri;
@Bean
public Neo4JGraph createNeo4jGraph() {
Driver driver = GraphDatabase.driver(neo4jUri, AuthTokens.basic(neo4jUsername, neo4jPassword));
Neo4JNativeElementIdProvider vertexIdProvider = new Neo4JNativeElementIdProvider();
Neo4JNativeElementIdProvider edgeIdProvider = new Neo4JNativeElementIdProvider();
Neo4JGraph neo4JGraph = new Neo4JGraph(driver, null, vertexIdProvider, edgeIdProvider);
return neo4JGraph;
}
@Bean
public GraphTraversalSource createGraphTraversalSource() {
return createNeo4jGraph().traversal();
}
}
- 在不修改 neo4j 数据的情况下,查询数据一直正常
- 在修改玩 neo4j 数据之后,再次查询,刚开始是正确的数据,多查几次后发现数据又变成了修改之前的数据
pom.xml 文件依赖
<dependency>
<groupId>com.steelbridgelabs.oss</groupId>
<artifactId>neo4j-gremlin-bolt</artifactId>
<version>0.4.6</version>
</dependency>
排查过程
- 查看查询的源码,断点跟进,没有发现问题
- 查看 neo4j-gremlin-bolt jar包内的类,打断点跟进发现数据不变的情况下,每次查询 session(Neo4JGraph.currentSession()) 都会重新创建,但是当数据发生变动之后,session 会从之前所有查询历史中选择某一个 session 去进行处理。
- 上面的是假象,其实数据不变的情况下,即使获取到之前的session ,数据也完全一样,查询结果也一样。
原因分析
- Neo4JGraph 的历史 session 存储在 ThreadLocal 的map中,但查询完成后线程没有杀掉。
- springboot 的线程也就是一个 Bean 持续存在,当某次查询跟之前查询属于同一个线程的情况下,Neo4JGraph.currentSession() 会从 ThreadLocal 的map中读取 session 信息, 也就是不会再创建新的 session。
解决方案
- 在每次调用查询之后主动关闭
neo4JGraph.close()
- aop 切片,在每次请求之后,执行session 关闭
补充 Neo4JGraph 流程例如:
graphTraversalSource.V().has(LabelNameConstant.EQUIP, "name", equipTypeName).toList()
V() 和 has() 会记录成 step
toList 之后才会真正触发查询
this.fill(new ArrayList<>())
Traversal.class
while (true) {
final Traverser<E> traverser = endStep.next();
TraversalHelper.addToCollection(collection, traverser.get(), traverser.bulk());
}
GraphStep.class
this.iteratorSupplier = () -> (Iterator<E>) (Vertex.class.isAssignableFrom(this.returnClass) ?
this.getTraversal().getGraph().get().vertices(this.ids) :
this.getTraversal().getGraph().get().edges(this.ids));
this.getTraversal().getGraph().get().vertices(this.ids)
Neo4JGraph.class
@Override
public Iterator<Vertex> vertices(Object... ids) {
// get current session
Neo4JSession session = currentSession();
// transaction should be ready for io operations
transaction.readWrite();
// find vertices, 此处第一次查询时,会从数据库进行match
return session.vertices(ids);
}
Neo4JSession.class
public Iterator<Vertex> vertices(Object[] ids) {
Objects.requireNonNull(ids, "ids cannot be null");
// verify identifiers
verifyIdentifiers(Vertex.class, ids);
// check we have all vertices already loaded ,此处判断是否是第一次查询
if (!verticesLoaded) {
// check ids
if (ids.length > 0) {
// parameters as a stream
Set<Object> identifiers = Arrays.stream(ids).map(id -> processIdentifier(vertexIdProvider, id)).collect(Collectors.toSet());
// filter ids, remove ids already in memory (only ids that might exist on server)
List<Object> filter = identifiers.stream().filter(id -> !vertices.containsKey(id) && !transientVertexIndex.containsKey(id)).collect(Collectors.toList());
// check we need to execute statement in server
if (!filter.isEmpty()) {
// vertex match predicate
String predicate = partition.vertexMatchPredicate("n");
// change operator on single id filtering (performance optimization)
if (filter.size() == 1) {
// execute statement
Result result = executeStatement("MATCH " + generateVertexMatchPattern("n") + " WHERE " + vertexIdProvider.matchPredicateOperand("n") + " = $id" + (predicate != null ? " AND " + predicate : "") + " RETURN n", Collections.singletonMap("id", filter.get(0)));
// create stream from query
Stream<Vertex> query = vertices(result);
// combine stream from memory and query result
Iterator<Vertex> iterator = combine(Stream.concat(identifiers.stream().filter(vertices::containsKey).map(id -> (Vertex)vertices.get(id)), identifiers.stream().filter(transientVertexIndex::containsKey).map(id -> (Vertex)transientVertexIndex.get(id))), query);
// process summary (query has been already consumed by combine)
ResultSummaryLogger.log(result.consume());
// return iterator
return iterator;
}
// execute statement
Result result = executeStatement("MATCH " + generateVertexMatchPattern("n") + " WHERE " + vertexIdProvider.matchPredicateOperand("n") + " IN $ids" + (predicate != null ? " AND " + predicate : "") + " RETURN n", Collections.singletonMap("ids", filter));
// create stream from query
Stream<Vertex> query = vertices(result);
// combine stream from memory and query result
Iterator<Vertex> iterator = combine(Stream.concat(identifiers.stream().filter(vertices::containsKey).map(id -> (Vertex)vertices.get(id)), identifiers.stream().filter(transientVertexIndex::containsKey).map(id -> (Vertex)transientVertexIndex.get(id))), query);
// process summary (query has been already consumed by combine)
ResultSummaryLogger.log(result.consume());
// return iterator
return iterator;
}
// no need to execute query, only items in memory
return combine(identifiers.stream().filter(vertices::containsKey).map(id -> (Vertex)vertices.get(id)), identifiers.stream().filter(transientVertexIndex::containsKey).map(id -> (Vertex)transientVertexIndex.get(id)));
}
// vertex match predicate
String predicate = partition.vertexMatchPredicate("n");
// execute statement
Result result = executeStatement("MATCH " + generateVertexMatchPattern("n") + (predicate != null ? " WHERE " + predicate : "") + " RETURN n", Collections.emptyMap());
// create stream from query
Stream<Vertex> query = vertices(result);
// combine stream from memory (transient) and query result
Iterator<Vertex> iterator = combine(transientVertices.stream().map(vertex -> (Vertex)vertex), query);
// process summary (query has been already consumed by combine)
ResultSummaryLogger.log(result.consume());
// it is safe to update loaded flag at this time
verticesLoaded = true;
// return iterator
return iterator;
}
// check ids
if (ids.length > 0) {
// parameters as a stream (set to remove duplicated ids)
Set<Object> identifiers = Arrays.stream(ids).map(id -> processIdentifier(vertexIdProvider, id)).collect(Collectors.toSet());
// no need to execute query, only items in memory
return combine(identifiers.stream().filter(vertices::containsKey).map(id -> (Vertex)vertices.get(id)), identifiers.stream().filter(transientVertexIndex::containsKey).map(id -> (Vertex)transientVertexIndex.get(id)));
}
// no need to execute query, all items in memory, 后面的数据全部从内存中备份出来
return combine(transientVertices.stream().map(vertex -> (Vertex)vertex), vertices.values().stream().map(vertex -> (Vertex)vertex));
}