1515 */
1616class TrieTree {
1717 protected $ nodeTree = [];
18- protected $ count = 0 ;
1918
2019 /**
2120 * 构造
@@ -27,10 +26,15 @@ public function __construct() {
2726
2827 /**
2928 * 从树种摘除一个文本
30- * @param $str
29+ * @param $index_str
3130 */
32- public function delete ($ str , $ deltree = false ) {
33- $ str = trim ($ str );
31+ public function delete ($ index_str , $ deltree = false , $ is_py = false , $ chinese = "" ) {
32+ $ str = trim ($ index_str );
33+ $ chinese = trim ($ chinese );
34+ if ($ is_py && empty ($ chinese )) {
35+ return false ;
36+ }
37+
3438 $ delstr_arr = $ this ->convertStrToH ($ str );
3539 $ len = count ($ delstr_arr );
3640 //提取树
@@ -64,10 +68,33 @@ public function delete($str, $deltree = false) {
6468 }
6569 //只有一个字 直接删除
6670 if ($ idx == 0 ) {
67- if (count ($ del_index [$ idx ]['index ' ]['child ' ]) == 0 ) {
68- unset($ this ->nodeTree [$ del_index [$ idx ]['code ' ]]);
69- return true ;
71+ //如果是拼音 只删除相应的拼音索引
72+ if ($ is_py ) {
73+ //清除单个拼音索引
74+ if (isset ($ this ->nodeTree [$ del_index [$ idx ]['code ' ]]['chinese_list ' ])) {
75+ $ is_del = false ;
76+ foreach ($ this ->nodeTree [$ del_index [$ idx ]['code ' ]]['chinese_list ' ] as $ key =>$ node ) {
77+ if ($ node ['word ' ] == $ chinese ){
78+ unset($ this ->nodeTree [$ del_index [$ idx ]['code ' ]]['chinese_list ' ][$ key ]);
79+ $ is_del = true ;
80+ break ;
81+ }
82+ }
83+ if ($ is_del && 0 != count ($ this ->nodeTree [$ del_index [$ idx ]['code ' ]]['chinese_list ' ])){
84+ return true ;
85+ }
86+ if (!$ is_del ){
87+ return false ;
88+ }
89+ //如果依然存在中文数据 则继续向下跑删除节点
90+ }
91+ }else {
92+ if (count ($ del_index [$ idx ]['index ' ]['child ' ]) == 0 ) {
93+ unset($ this ->nodeTree [$ del_index [$ idx ]['code ' ]]);
94+ return true ;
95+ }
7096 }
97+
7198 }
7299 //末梢为关键词结尾,且存在子集 清除结尾标签
73100 if (count ($ del_index [$ idx ]['index ' ]['child ' ]) > 0 ) {
@@ -98,12 +125,17 @@ public function delete($str, $deltree = false) {
98125 /**
99126 * ADD word [UTF8]
100127 * 增加新特性,在质感末梢增加自定义数组
101- * @param $str 添加的词
128+ * @param $index_str 添加的词
102129 * @param array $data 添加词的附加属性
103130 * @return $this
104131 */
105- public function append ($ str , $ data = array ()) {
106- $ str = trim ($ str );
132+ public function append ($ index_str , $ data = array (), $ is_py = false , $ chinese = '' ) {
133+ $ str = trim ($ index_str );
134+ $ chinese = trim ($ chinese );
135+ if ($ is_py && empty ($ chinese )) {
136+ return false ;
137+ }
138+
107139 $ childTree = &$ this ->nodeTree ;
108140 $ len = strlen ($ str );
109141 for ($ i = 0 ; $ i < $ len ; $ i ++) {
@@ -137,19 +169,16 @@ public function append($str, $data = array()) {
137169 }
138170 if ($ i == ($ len - 1 )) {
139171 $ is_end = true ;
172+ if ($ is_py ) {
173+ $ str = $ chinese ;
174+ }
140175 }
141- $ childTree = &$ this ->_appendWordToTree ($ childTree , $ code , $ word , $ is_end , $ data , $ str );
142-
176+ $ childTree = &$ this ->_appendWordToTree ($ childTree , $ code , $ word , $ is_end , $ data , $ str , $ is_py );
143177 }
144- $ this ->count ++;
145178 unset($ childTree );
146179 return $ this ;
147180 }
148181
149- public function getCount () {
150- return $ this ->count ;
151- }
152-
153182 /**
154183 * 追加一个字[中英文]到树中
155184 * @param $tree
@@ -160,18 +189,36 @@ public function getCount() {
160189 * @param string $full_str
161190 * @return mixed
162191 */
163- private function &_appendWordToTree (&$ tree , $ code , $ word , $ end = false , $ data = array (), $ full_str = '' ) {
192+ private function &_appendWordToTree (&$ tree , $ code , $ word , $ end = false , $ data = array (), $ full_str = '' , $ is_py ) {
164193 if (!isset ($ tree [$ code ])) {
165194 $ tree [$ code ] = array (
166195 'end ' => $ end ,
167196 'child ' => array (),
168- 'value ' => $ word
197+ 'value ' => $ word,
169198 );
170199 }
171200 if ($ end ) {
172201 $ tree [$ code ]['end ' ] = true ;
173- $ tree [$ code ]['data ' ] = $ data ;
174- $ tree [$ code ]['full ' ] = $ full_str ;
202+ $ tree [$ code ]['is_py ' ] = $ is_py ;
203+ //拼音不需要full 拼音根据读音多样性对应多个词语 重复词语覆盖data
204+ if ($ is_py ) {
205+ $ is_change = false ;
206+ if (isset ($ tree [$ code ]["chinese_list " ]) && count ($ tree [$ code ]["chinese_list " ])>0 ) {
207+ foreach ($ tree [$ code ]["chinese_list " ] as $ key => &$ node ) {
208+ if ($ node ['word ' ] == $ full_str ) {
209+ $ node ['data ' ] = $ data ;
210+ $ is_change = true ;
211+ break ;
212+ }
213+ }
214+ }
215+ if (!$ is_change ){
216+ $ tree [$ code ]['chinese_list ' ][] = ["word " => $ full_str , "data " => $ data ];
217+ }
218+ } else {
219+ $ tree [$ code ]['full ' ] = $ full_str ;
220+ $ tree [$ code ]['data ' ] = $ data ;
221+ }
175222 }
176223
177224 return $ tree [$ code ]['child ' ];
@@ -185,13 +232,19 @@ public function getTree() {
185232 return $ this ->nodeTree ;
186233 }
187234
188- public function getTreeWord ($ word , $ count = 0 ) {
235+ /**
236+ * 匹配下面的全部词语
237+ * @param $word
238+ * @param int $deep 检索深度 检索之后的词语数量可能会大于这个数字
239+ * @return array|bool
240+ */
241+ public function getTreeWord ($ word , $ deep = 0 ) {
189242 $ search = trim ($ word );
190243 if (empty ($ search )) {
191244 return false ;
192245 }
193- if ( $ count ===0 ) {
194- $ count = 9999 ;
246+ if ( $ deep === 0 ) {
247+ $ deep = 999 ;
195248 }
196249
197250 $ word_keys = $ this ->convertStrToH ($ search );
@@ -202,27 +255,35 @@ public function getTreeWord($word, $count = 0) {
202255 if (isset ($ tree [$ val ])) {
203256 //检测当前词语是否已命中
204257 if ($ key == $ key_count - 1 && $ tree [$ val ]['end ' ] == true ) {
205- $ words [] = ["word " => $ tree [$ val ]['full ' ], "data " => $ tree [$ val ]['data ' ]];
258+ if (isset ($ tree [$ val ]['chinese_list ' ])) {
259+ $ words = array_merge ($ words , $ tree [$ val ]['chinese_list ' ]);
260+ } else {
261+ $ words [] = ["word " => $ tree [$ val ]['full ' ], "data " => $ tree [$ val ]['data ' ]];
262+ }
206263 }
207264 $ tree = &$ tree [$ val ]["child " ];
208- }else {
265+ } else {
209266 //第一个字符都没有命中
210- if ($ key == 0 ){
267+ if ($ key == 0 ) {
211268 return [];
212269 }
213270 }
214271 }
215- $ this ->_getTreeWord ($ tree , $ count , $ words );
272+ $ this ->_getTreeWord ($ tree , $ deep , $ words );
216273 return $ words ;
217274 }
218275
219- private function _getTreeWord (&$ child , $ count , &$ words = array ()) {
276+ private function _getTreeWord (&$ child , $ deep , &$ words = array ()) {
220277 foreach ($ child as $ node ) {
221278 if ($ node ['end ' ] == true ) {
222- $ words [] = ["word " => $ node ['full ' ], "data " => $ node ['data ' ]];
279+ if (isset ($ node ['chinese_list ' ])) {
280+ $ words = array_merge ($ words , $ node ['chinese_list ' ]);
281+ } else {
282+ $ words [] = ["word " => $ node ['full ' ], "data " => $ node ['data ' ]];
283+ }
223284 }
224- if (!empty ($ node ['child ' ]) && $ count >= count ($ words )) {
225- $ this ->_getTreeWord ($ node ['child ' ], $ count , $ words );
285+ if (!empty ($ node ['child ' ]) && $ deep >= count ($ words )) {
286+ $ this ->_getTreeWord ($ node ['child ' ], $ deep , $ words );
226287 }
227288 }
228289 }
0 commit comments