Skip to content

Commit 3df3e79

Browse files
committed
refactoring
1 parent 8207f66 commit 3df3e79

File tree

11 files changed

+195
-31
lines changed

11 files changed

+195
-31
lines changed

README.md

Lines changed: 129 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,15 @@ NLP Tasks Available through Microsoft Labs API:
2020
```bash
2121
composer require web64/php-nlp-client
2222
```
23+
### Core NLP (Java)
2324

25+
Download CoreNLP server here: https://stanfordnlp.github.io/CoreNLP/index.html#download
2426

27+
```bash
28+
# Run the server using all jars in the current directory (e.g., the CoreNLP home directory)
29+
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
30+
```
31+
More info about running the CoreNLP Server: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
2532
## Usage
2633

2734
### Language detection:
@@ -50,7 +57,7 @@ Array
5057
[meta_data] => Array()
5158
[meta_description] => GitHub is where people build software. More than 27 million people use GitHub to discover, fork, and contribute to over 80 million projects.
5259
[meta_lang] => en
53-
[source_url] => ://
60+
[source_url] =>
5461
[text] => NLP Server. Python Flask web service for easy access to multilingual NLP tasks such as language detection, article extraction...
5562
[title] => web64/nlpserver: NLP Web Service
5663
[top_image] => https://avatars2.githubusercontent.com/u/76733?s=400&v=4
@@ -66,10 +73,10 @@ $entities = $polyglot->getEntities();
6673
$sentiment = $polyglot->getSentiment();
6774
```
6875

69-
### Embeddings - Neighbouring words
76+
### Neighbouring words (Embeddings)
7077
```php
7178
$nlp = new \Web64\Nlp\NlpClient('http://localhost:6400/');
72-
$neighbours = $nlp->embeddings('obama', 'en');
79+
$neighbours = $nlp->neighbours('obama', 'en');
7380
/*
7481
Array
7582
(
@@ -87,4 +94,123 @@ Array
8794
*/
8895
```
8996

97+
### Spacy Entities
98+
```php
99+
$text = "Harvesters is a 1905 oil painting on canvas by the Danish artist Anna Ancher, a member of the artists' community known as the Skagen Painters.";
100+
101+
$nlp = new \Web64\Nlp\NlpClient('http://localhost:6400/');
102+
$entities = $nlp->spacy_entities( $text );
103+
/*
104+
Array
105+
(
106+
[DATE] => Array
107+
(
108+
[0] => 1905
109+
)
110+
111+
[NORP] => Array
112+
(
113+
[0] => Danish
114+
)
115+
116+
[ORG] => Array
117+
(
118+
[0] => the Skagen Painters
119+
)
120+
121+
[PERSON] => Array
122+
(
123+
[0] => Anna Ancher
124+
)
125+
)
126+
*/
127+
```
128+
129+
English is used by default. To use another language ensure Spacy language model is downloaded and add the language as the second parameter
130+
```php
131+
$entities = $nlp->spacy_entities( $spanish_text, 'es' );
132+
```
133+
134+
135+
### Summarizer
136+
Extract short summary from a long text
137+
```php
138+
$summary = $nlp->summarize( $long_text );
139+
```
140+
141+
142+
### Readability
143+
Article Extraction using python port of Readability.js
144+
145+
```php
146+
$nlp = new \Web64\Nlp\NlpClient( 'http://localhost:6400/' );
147+
148+
// From URL:
149+
$article = $nlp->readabilityUrl('https://github.com/web64/nlpserver');
150+
151+
// From HTML:
152+
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
153+
$article = $nlp->readabilityHtml( $html );
154+
155+
/*
156+
Array
157+
(
158+
[article_html] => <div><h1>NLP Server</h1><p>Python 3 Flask web service for easy access to multilingual NLP tasks ...
159+
[short_title] => web64/nlpserver: NLP Web Service
160+
[text] => NLP Server Python 3 Flask web service for easy access to multilingual NLP tasks such as language detection ...
161+
[title] => GitHub - web64/nlpserver: NLP Web Service
162+
)
163+
*/
164+
```
165+
166+
167+
### Polyglot Entities & Sentiment Analysis
168+
This uses the Polyglot multilingual NLP library to return entities and a sentiment score for given text.
169+
Ensure the models for the required languages are downloaded for Polyglot.
170+
171+
```php
172+
$polyglot = $nlp->polyglot( $text );
173+
174+
// Specify language
175+
$polyglot = $nlp->polyglot( $text, 'no' );
176+
177+
$polyglot->getSentiment(); // -1
178+
179+
$polyglot->getEntityTypes();
180+
/*
181+
Array
182+
(
183+
[Locations] => Array
184+
(
185+
[0] => United Kingdom
186+
)
187+
[Organizations] =>
188+
[Persons] => Array
189+
(
190+
[0] => Ben
191+
[1] => Sir Benjamin Hall
192+
[2] => Benjamin Caunt
193+
)
194+
)
195+
*/
196+
197+
$polyglot->getLocations(); // Array of Locations
198+
$polyglot->getOrganizations(); // Array of organisations
199+
$polyglot->getPersons(); // Array of people
200+
201+
202+
203+
$polyglot->getEntities();
204+
/*
205+
Returns combined array of all entities
206+
Array
207+
(
208+
[0] => Ben
209+
[1] => United Kingdom
210+
[2] => Sir Benjamin Hall
211+
[3] => Benjamin Caunt
212+
)
213+
*/
214+
```
215+
90216

src/Classes/PolyglotResponse.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public function getEntityTypes()
8989
{
9090
return [
9191
'Locations' => $this->getLocations(),
92-
'Organizations' => $this->geOrganizations(),
92+
'Organizations' => $this->getOrganizations(),
9393
'Persons' => $this->getPersons(),
9494
];
9595
}
@@ -100,7 +100,7 @@ public function getLocations()
100100
return array_keys( $this->data['type_entities']['I-LOC'] );
101101
}
102102

103-
public function geOrganizations()
103+
public function getOrganizations()
104104
{
105105
if ( !empty($this->data['type_entities']['I-ORG']) )
106106
return array_keys( $this->data['type_entities']['I-ORG'] );

src/CoreNlp.php

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,28 @@
1111
*/
1212
class CoreNlp
1313
{
14-
public $api_url = 'http://homestead:9000/';
14+
public $api_url = 'http://localhost:9000/';
15+
public $api_hosts = [];
1516

1617
public $properties = [];
1718
public $data;
1819

20+
function __construct( $hosts, $debug = false )
21+
{
22+
$this->debug = (bool)$debug;
23+
24+
if ( is_array($hosts) )
25+
{
26+
foreach( $hosts as $host )
27+
$this->addHost( $host );
28+
}
29+
else
30+
$this->addHost( $hosts );
31+
32+
// pick random host as default
33+
$this->api_url = $this->api_hosts[ array_rand( $this->api_hosts ) ];
34+
}
35+
1936
public function entities( $text )
2037
{
2138
$this->properties = [

src/NlpClient.php

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class NlpClient{
1111
public $api_url;
1212
public $api_hosts = [];
1313
public $fail_count = 0;
14-
public $debug = false;
14+
public $debug = true;
1515
private $max_retry_count = 3;
1616

1717
function __construct( $hosts, $debug = false )
@@ -47,7 +47,7 @@ public function summarize( $text, $word_count = null )
4747
{
4848
$data = $this->post_call('/summarize', ['text' => $text, 'word_count' => $word_count ] );
4949

50-
return ( !empty($data['summary']) ) ? $data['summary'] : null;
50+
return ( !empty($data['summarize']) ) ? $data['summarize'] : null;
5151
}
5252

5353
/**
@@ -78,7 +78,7 @@ public function readabilityUrl( $url )
7878
{
7979
$data = $this->get_call('/readability', ['url' => $url ] );
8080

81-
return ( !empty($data['data']) ) ? $data['data'] : null;
81+
return ( !empty($data['readability']) ) ? $data['readability'] : null;
8282
}
8383

8484
/**
@@ -88,15 +88,15 @@ public function readabilityHTML( $html )
8888
{
8989
$data = $this->post_call('/readability', ['html' => $html ] );
9090

91-
return ( !empty($data['data']) ) ? $data['data'] : null;
91+
return ( !empty($data['readability']) ) ? $data['readability'] : null;
9292
}
9393

9494
/**
9595
* Get neighbouring words
9696
*/
97-
public function embeddings( $word, $lang = 'en')
97+
public function neighbours( $word, $lang = 'en')
9898
{
99-
$data = $this->get_call('/embeddings', ['word' => $word, 'lang' => $lang ] );
99+
$data = $this->get_call('/neighbours', ['word' => $word, 'lang' => $lang ] );
100100

101101
return ( !empty($data['neighbours']) ) ? $data['neighbours'] : null;
102102
}
@@ -107,7 +107,7 @@ public function embeddings( $word, $lang = 'en')
107107
public function polyglot( $text, $language = null )
108108
{
109109
$data = $this->post_call('/polyglot', ['text' => $text, 'lang' => $language] );
110-
110+
$this->msg( $data );
111111
return new \Web64\Nlp\Classes\PolyglotResponse( $data['polyglot'] );
112112
}
113113

@@ -116,7 +116,7 @@ public function polyglot( $text, $language = null )
116116
*/
117117
public function language( $text )
118118
{
119-
$data = $this->post_call('/language', ['text' => $text] );
119+
$data = $this->post_call('/langid', ['text' => $text] );
120120

121121
if ( isset($data['langid']) && isset($data['langid']['language']))
122122
{
@@ -156,6 +156,10 @@ public function post_call($path, $params, $retry = 0 )
156156
if ( empty($result) || ( isset($http_response_header) && $http_response_header[0] != 'HTTP/1.0 200 OK' ) ) // empty if server is down
157157
{
158158
$this->msg( "Host Failed: {$url}" );
159+
160+
if ( $retry >= $this->max_retry_count )
161+
return null;
162+
159163
$this->chooseHost();
160164
return $this->post_call($path, $params, $retry );
161165
}
@@ -170,16 +174,23 @@ public function get_call($path, $params, $retry = 0)
170174
$url = $this->api_url . $path;
171175

172176
$retry++;
173-
177+
174178
if ( !empty($params) )
175179
$url .= '?' . http_build_query( $params );
176180

177181
$this->msg( "NLP API [GET] $path - $url ");
178182
$result = @file_get_contents( $url, false );
179183

184+
if ( $http_response_header[0] == 'HTTP/1.0 404 NOT FOUND' )
185+
return null;
186+
180187
if ( empty($result) || ( isset($http_response_header) && $http_response_header[0] != 'HTTP/1.0 200 OK' ) ) // empty if server is down
181188
{
182189
$this->msg( "Host Failed: {$url}" );
190+
191+
if ( $retry >= $this->max_retry_count )
192+
return null;
193+
183194
$this->chooseHost();
184195
return $this->get_call($path, $params, $retry );
185196
}
@@ -209,11 +220,10 @@ private function msg( $value )
209220
{
210221
if ( is_array($value) )
211222
{
212-
print_r( $value );
213-
echo PHP_EOL;
223+
fwrite(STDOUT, print_r( $value, true ) . PHP_EOL );
214224
}
215225
else
216-
echo $value . PHP_EOL;
226+
fwrite(STDOUT, $value . PHP_EOL );
217227
}
218228
}
219229

tests/TestCase.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public function setUp()
1515
'http://localhost:6400/',
1616
'http://localhost:6400/',
1717
],
18-
'debug' => false,
18+
'debug' => true,
1919
];
2020
}
2121

tests/Unit/CoreNlpTest.php

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
namespace Tests\Unit;
44

55
use Tests\TestCase;
6-
6+
/**
7+
* java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
8+
* nohup java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000 &
9+
*/
710
class CoreNlpTest extends TestCase
811
{
912
/** @test */
1013
public function test_core_nlp()
1114
{
12-
$corenlp = new \Web64\Nlp\CoreNlp();
15+
$corenlp = new \Web64\Nlp\CoreNlp('http://localhost:9000/');
16+
1317
//echo PHP_EOL. PHP_EOL;
1418
$text = "Catalonia: Ex-police chief Trapero charged with sedition. The former chief of Catalonia's police force, Josep Lluis Trapero, has been charged over events linked with last year's independence referendum.";
1519

@@ -26,7 +30,7 @@ public function test_core_nlp()
2630
";
2731
//echo $text . PHP_EOL. PHP_EOL;
2832
$entities = $corenlp->entities( $text );
29-
//print_r( $entities );
33+
print_r( $entities );
3034

3135

3236
$this->assertNotEmpty( $entities['COUNTRY'] );

tests/Unit/EmbeddingsTest.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ class EmbeddingsTest extends TestCase
1010
public function get_neighbours()
1111
{
1212
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
13+
14+
$neighbours = $nlp->neighbours('obama', 'no');
1315

14-
$neighbours = $nlp->embeddings('obama', 'no');
15-
16-
//$this->msg( $neighbours );
16+
// $this->msg( $neighbours );
1717

1818
$this->assertNotEmpty($neighbours);
1919
}

tests/Unit/PolyglotTest.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,20 @@ public function entity_extraction()
1919
$polyglot = $nlp->polyglot( $text, 'en' );
2020

2121
$this->msg( $polyglot );
22+
$this->msg( $polyglot->getEntities() );
2223

2324
$this->assertNotEmpty( $polyglot->data );
2425
$this->assertArrayHasKey('sentiment', $polyglot->data, "Missing sentiment");
2526
$this->assertNotEmpty( $polyglot->data['entities'] );
2627

2728
$this->assertNotEmpty(
2829
$polyglot->getEntities()
30+
);
31+
32+
$this->assertNotEmpty(
33+
$polyglot->getPersons()
2934
);
35+
3036

3137
$this->assertTrue(
3238
is_numeric($polyglot->getSentiment())

0 commit comments

Comments
 (0)