@@ -15,6 +15,7 @@ if (typeof ExtractContentJS == 'undefined') {
1515 if ( typeof ns . RelativeWords . Engine != 'undefined' ) {
1616 return new ns . RelativeWords . Engine [ name ] ;
1717 }
18+ return null ;
1819 }
1920 } ;
2021
@@ -52,12 +53,16 @@ if (typeof ExtractContentJS == 'undefined') {
5253 text : 32768
5354 }
5455 } ;
55- var self = { weight : opt . weight || 0.3 } ;
56+ var self = { weight : opt . weight || 0.4 } ;
5657
5758 self . vote = function ( doc , words ) {
5859 var total = 0 ;
5960 var max = 0 ;
6061 var scores = { } ;
62+ var content = ( doc . content + '' ) . substr ( 0 , opt . limit . text ) ;
63+ content = content . toLowerCase ( ) ;
64+ var title = ( doc . title || '' ) . toLowerCase ( ) ;
65+ var url = ( doc . url || '' ) . toLowerCase ( ) ;
6166 for ( var t in words ) total += words [ t ] . df ;
6267 for ( var t in words ) {
6368 var df = words [ t ] . df ;
@@ -67,20 +72,47 @@ if (typeof ExtractContentJS == 'undefined') {
6772
6873 var tf = 0 ;
6974 var w = t . toLowerCase ( ) ;
70- var text = ( doc . content + '' ) . substr ( 0 , opt . limit . text ) ;
71- tf += Util . countMatchTokenized ( text . toLowerCase ( ) , w ) ;
72- if ( doc . title ) {
73- tf += Util . countMatchTokenized ( doc . title . toLowerCase ( ) , w ) ;
74- }
75- if ( doc . url ) {
76- tf += Util . countMatchTokenized ( doc . url . toLowerCase ( ) , w ) ;
77- }
75+ tf += Util . countMatchTokenized ( content , w ) ;
76+ tf += Util . countMatchTokenized ( title , w ) ;
77+ tf += Util . countMatchTokenized ( url , w ) ;
7878
7979 scores [ t ] = tf / idf ;
8080 if ( scores [ t ] > max ) max = scores [ t ] ;
8181 }
8282 if ( ! max ) return ;
83+ for ( var t in scores ) {
84+ var score = scores [ t ] / max ; // normalize
85+ words [ t ] . score += score * self . weight ;
86+ }
87+ } ;
88+
89+ return self ;
90+ } ;
91+
92+ ns . RelativeWords . Engine . ContentPosition = function ( ) {
93+ var opt = arguments [ 0 ] || {
94+ limit : {
95+ text : 32768
96+ }
97+ } ;
98+ var self = { weight : opt . weight || 0.1 } ;
99+
100+ self . vote = function ( doc , words ) {
101+ var max = 0 ;
102+ var scores = { } ;
103+ var content = ( doc . content + '' ) . substr ( 0 , opt . limit . text ) ;
104+ content = content . toLowerCase ( ) ;
83105 for ( var t in words ) {
106+ var w = t . toLowerCase ( ) ;
107+ var index = Util . indexOfTokenized ( content , w ) ;
108+ if ( index >= 0 ) {
109+ scores [ t ] = scores [ t ] || 0 ;
110+ scores [ t ] += 1.0 / ( index + 1 ) ;
111+ if ( max < scores [ t ] ) max = scores [ t ] ;
112+ }
113+ }
114+ if ( ! max ) return ;
115+ for ( var t in scores ) {
84116 var score = scores [ t ] / max ; // normalize
85117 words [ t ] . score += score * self . weight ;
86118 }
@@ -89,11 +121,46 @@ if (typeof ExtractContentJS == 'undefined') {
89121 return self ;
90122 } ;
91123
124+ ns . RelativeWords . Engine . TitlePosition = function ( ) {
125+ var opt = arguments [ 0 ] || {
126+ limit : {
127+ text : 32768
128+ }
129+ } ;
130+ var self = {
131+ weight : {
132+ global : ( opt . weight && opt . weight . global ) || 0.4 ,
133+ title : ( opt . weight && opt . weight . title ) || 0.35
134+ }
135+ } ;
136+
137+ self . vote = function ( doc , words ) {
138+ var max = 0 ;
139+ var scores = { } ;
140+ var title = ( doc . title || '' ) . toLowerCase ( ) ;
141+ for ( var t in words ) {
142+ var w = t . toLowerCase ( ) ;
143+ var index = Util . indexOfTokenized ( title , w ) ;
144+ if ( index >= 0 ) {
145+ scores [ t ] = 1 + self . weight . title / ( 1 + Math . log ( index + 1 ) ) ;
146+ if ( max < scores [ t ] ) max = scores [ t ] ;
147+ }
148+ }
149+ if ( ! max ) return ;
150+ for ( var t in scores ) {
151+ var score = scores [ t ] / max ; // normalize
152+ words [ t ] . score += score * self . weight . global ;
153+ }
154+ } ;
155+
156+ return self ;
157+ } ;
158+
92159 ns . suggestTags = function ( url , title , body , tags ) {
93160 var sc = new ns . RelativeWords ( ) ;
94161 sc . addEngine ( sc . factory . getEngine ( 'TfIdf' ) ) ;
95- // sc.addEngine( sc.factory.getEngine('Position ') );
96- // sc.addEngine( sc.factory.getEngine('URL ') );
162+ sc . addEngine ( sc . factory . getEngine ( 'ContentPosition ' ) ) ;
163+ sc . addEngine ( sc . factory . getEngine ( 'TitlePosition ' ) ) ;
97164 return sc . top ( { url : url , title : title , content : body } , tags ) ;
98165 } ;
99166
0 commit comments