基于 Trie 实现敏感词过滤
加载外部文件
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| public void init() { try ( InputStream is = this.getClass().getClassLoader().getResourceAsStream("sensitive-words.txt"); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); ) { String keyword; while ((keyword = reader.readLine()) != null) { this.addKeyword(keyword); } } catch (IOException e) { logger.error("加载敏感词文件失败: " + e.getMessage()); } }
|
定义 Trie
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| private class TrieNode { private boolean isKeywordEnd = false;
private Map<Character, TrieNode> subNodes = new HashMap<>();
public boolean isKeywordEnd() { return isKeywordEnd; }
public void setKeywordEnd(boolean keywordEnd) { isKeywordEnd = keywordEnd; }
public void addSubNode(Character c, TrieNode node) { subNodes.put(c, node); }
public TrieNode getSubNode(Character c) { return subNodes.get(c); } }
|
将一个敏感词添加到前缀树中
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| private void addKeyword(TrieNode rootNode, String keyword) { TrieNode tempNode = rootNode; for (int i = 0; i < keyword.length(); i++) { char c = keyword.charAt(i); TrieNode subNode = tempNode.getSubNode(c);
if (subNode == null) { subNode = new TrieNode(); tempNode.addSubNode(c, subNode); }
tempNode = subNode;
if (i == keyword.length() - 1) { tempNode.setKeywordEnd(true); } } }
|
过滤敏感词
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| public String filter(String text) { if (StringUtils.isBlank(text)) { return null; }
TrieNode tempNode = rootNode; int begin = 0; int position = 0; StringBuilder sb = new StringBuilder();
while (position < text.length()) { char c = text.charAt(position);
if (isSymbol(c)) { if (tempNode == rootNode) { sb.append(c); begin++; } position++; continue; }
tempNode = tempNode.getSubNode(c); if (tempNode == null) { sb.append(text.charAt(begin)); position = ++begin; tempNode = rootNode; } else if (tempNode.isKeywordEnd()) { sb.append("****"); begin = ++position; tempNode = rootNode; } else { position++; } }
sb.append(text.substring(begin));
return sb.toString(); }
|
最后更新时间: