Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def add_suggestions(self, *suggestions, **kwargs)

Add suggestion terms to the AutoCompleter engine. Each suggestion has a score and string.

If kwargs['increment'] is true and the terms are already in the server's dictionary, we increment their scores
If kwargs['increment'] is true and the terms are already in the server's dictionary, we increment their scores


### delete
Expand All @@ -118,7 +118,7 @@ Returns 1 if the string was found and deleted, 0 otherwise
### get\_suggestions
```py

def get_suggestions(self, prefix, fuzzy=False, num=10, with_scores=False)
def get_suggestions(self, prefix, fuzzy=False, num=10, with_scores=False, with_payloads=False)

```

Expand All @@ -132,6 +132,7 @@ Get a list of suggestions from the AutoCompleter, for a given prefix
**NOTE**: Running fuzzy searches on short (<3 letters) prefixes can be very slow, and even scan the entire index.
- **with_scores**: if set to true, we also return the (refactored) score of each suggestion.
This is normally not needed, and is NOT the original score inserted into the index
- **with_payloads**: Return suggestion payloads
- **num**: The maximum number of results we return. Note that we might return less. The algorithm trims irrelevant suggestions.

Returns a list of Suggestion objects. If with_scores was False, the score of all suggestions is 1.
Expand Down Expand Up @@ -205,7 +206,7 @@ Create a new batch indexer from the client with a given chunk size
### create\_index
```py

def create_index(self, fields, no_term_offsets=False, no_field_flags=False, no_score_indexes=False)
def create_index(self, fields, no_term_offsets=False, no_field_flags=False, no_score_indexes=False, stopwords=None)

```

Expand All @@ -219,6 +220,20 @@ Create the search index. Creating an existing index juts updates its properties
- **no_term_offsets**: If true, we will not save term offsets in the index
- **no_field_flags**: If true, we will not save field flags that allow searching in specific fields
- **no_score_indexes**: If true, we will not save optimized top score indexes for single word queries
- **stopwords**: If not None, we create the index with this custom stopword list. The list can be empty


### delete\_document
```py

def delete_document(self, doc_id, conn=None)

```



Delete a document from index
Returns 1 if the document was deleted, 0 if not


### drop\_index
Expand Down Expand Up @@ -382,7 +397,7 @@ NumericField is used to define a numeric field in a schema defintion
### \_\_init\_\_
```py

def __init__(self, name)
def __init__(self, name, sortable=False)

```

Expand Down Expand Up @@ -550,6 +565,18 @@ def query_string(self)
Return the query string of this query only


### return\_fields
```py

def return_fields(self, *fields)

```



Only return values from these fields


### slop
```py

Expand Down Expand Up @@ -609,7 +636,7 @@ Represents a single suggestion being sent or returned from the auto complete ser
### \_\_init\_\_
```py

def __init__(self, string, score=1.0)
def __init__(self, string, score=1.0, payload=None)

```

Expand All @@ -622,7 +649,7 @@ TextField is used to define a text field in a schema definition
### \_\_init\_\_
```py

def __init__(self, name, weight=1.0)
def __init__(self, name, weight=1.0, sortable=False)

```

Expand Down
67 changes: 51 additions & 16 deletions redisearch/auto_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,58 @@ class Suggestion(object):
"""
Represents a single suggestion being sent or returned from the auto complete server
"""
def __init__(self, string, score=1.0):
def __init__(self, string, score=1.0, payload=None):

self.string = string
self.score = score
self.payload = payload

def __repr__(self):

return self.string


class SuggestionParser(object):
"""
Internal class used to parse results from the `SUGGET` command.
This needs to consume either 1, 2, or 3 values at a time from
the return value depending on what objects were requested
"""
def __init__(self, with_scores, with_payloads, ret):
self.with_scores = with_scores
self.with_payloads = with_payloads

if with_scores and with_payloads:
self.sugsize = 3
self._scoreidx = 1
self._payloadidx = 2
elif with_scores:
self.sugsize = 2
self._scoreidx = 1
elif with_payloads:
self.sugsize = 2
self._payloadidx = 1
else:
self.sugsize = 1
self._scoreidx = -1

self._sugs = ret

def __iter__(self):
for i in xrange(0, len(self._sugs), self.sugsize):
ss = self._sugs[i]
score = float(self._sugs[i + self._scoreidx]) if self.with_scores else 1.0
payload = self._sugs[i + self._payloadidx] if self.with_payloads else None
yield Suggestion(ss, score, payload)


class AutoCompleter(object):
"""
A client to RediSearch's AutoCompleter API

It provides prefix searches with optionally fuzzy matching of prefixes
"""

SUGADD_COMMAND = "FT.SUGADD"
SUGDEL_COMMAND = "FT.SUGDEL"
SUGLEN_COMMAND = "FT.SUGLEN"
Expand All @@ -28,6 +64,7 @@ class AutoCompleter(object):
INCR = 'INCR'
WITHSCORES = 'WITHSCORES'
FUZZY = 'FUZZY'
WITHPAYLOADS = 'WITHPAYLOADS'

def __init__(self, key, host='localhost', port=6379, conn = None):
"""
Expand All @@ -44,14 +81,17 @@ def add_suggestions(self, *suggestions, **kwargs):
"""
Add suggestion terms to the AutoCompleter engine. Each suggestion has a score and string.

If kwargs['increment'] is true and the terms are already in the server's dictionary, we increment their scores
If kwargs['increment'] is true and the terms are already in the server's dictionary, we increment their scores
"""
pipe = self.redis.pipeline()
for sug in suggestions:
args = [AutoCompleter.SUGADD_COMMAND, self.key, sug.string, sug.score]
if kwargs.get('increment'):
args.append(AutoCompleter.INCR)

if sug.payload:
args.append('PAYLOAD')
args.append(sug.payload)

pipe.execute_command(*args)

return pipe.execute()[-1]
Expand All @@ -71,7 +111,7 @@ def delete(self, string):
"""
return self.redis.execute_command(AutoCompleter.SUGDEL_COMMAND, self.key, string)

def get_suggestions(self, prefix, fuzzy = False, num = 10, with_scores = False):
def get_suggestions(self, prefix, fuzzy = False, num = 10, with_scores = False, with_payloads=False):
"""
Get a list of suggestions from the AutoCompleter, for a given prefix

Expand All @@ -81,6 +121,7 @@ def get_suggestions(self, prefix, fuzzy = False, num = 10, with_scores = False):
**NOTE**: Running fuzzy searches on short (<3 letters) prefixes can be very slow, and even scan the entire index.
- **with_scores**: if set to true, we also return the (refactored) score of each suggestion.
This is normally not needed, and is NOT the original score inserted into the index
- **with_payloads**: Return suggestion payloads
- **num**: The maximum number of results we return. Note that we might return less. The algorithm trims irrelevant suggestions.

Returns a list of Suggestion objects. If with_scores was False, the score of all suggestions is 1.
Expand All @@ -91,19 +132,13 @@ def get_suggestions(self, prefix, fuzzy = False, num = 10, with_scores = False):
args.append(AutoCompleter.FUZZY)
if with_scores:
args.append(AutoCompleter.WITHSCORES)

if with_payloads:
args.append(AutoCompleter.WITHPAYLOADS)

ret = self.redis.execute_command(*args)
results = []
if not ret:
return results

if with_scores:
# return suggestiongs with scores
return [Suggestion(ret[i], float(ret[i+1])) for i in xrange(0, len(ret), 2)]

# return suggestions without scores
return [Suggestion(s) for s in ret]




parser = SuggestionParser(with_scores, with_payloads, ret)
return [s for s in parser]
13 changes: 13 additions & 0 deletions redisearch/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, query_string):
self._ids = None
self._slop = -1
self._in_order = False
self._return_fields = []


def query_string(self):
Expand All @@ -42,6 +43,13 @@ def limit_ids(self, *ids):
self._ids = ids
return self

def return_fields(self, *fields):
"""
Only return values from these fields
"""
self._return_fields = fields
return self

def slop(self, slop):
"""
Allow a masimum of N intervening non matched terms between phrase terms (0 means exact phrase)
Expand Down Expand Up @@ -98,6 +106,11 @@ def get_args(self):
if self._in_order:
args.append('INORDER')

if self._return_fields:
args.append('RETURN')
args.append(len(self._return_fields))
args += self._return_fields

args += ["LIMIT", self._offset, self._num]

return args
Expand Down
30 changes: 26 additions & 4 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

from redisearch import *

WILL_PLAY_TEXT = os.path.abspath(os.path.dirname(__file__) + '/will_play_text.csv.bz2')
TITLES_CSV = os.path.abspath(os.path.dirname(__file__) + '/titles.csv')

class RedisSearchTestCase(ModuleTestCase('../module.so')):

def createIndex(self, client, num_docs = 100):
Expand All @@ -28,7 +31,7 @@ def createIndex(self, client, num_docs = 100):

chapters = {}

with bz2.BZ2File('will_play_text.csv.bz2') as fp:
with bz2.BZ2File(WILL_PLAY_TEXT) as fp:

r = csv.reader(fp, delimiter=';')
for n, line in enumerate(r):
Expand Down Expand Up @@ -129,7 +132,11 @@ def testClient(self):
self.assertEqual(len(subset), docs.total)
ids = [x.id for x in docs.docs]
self.assertEqual(set(ids), set(subset))


for doc in client.search(Query('henry king').return_fields('play', 'nonexist')).docs:
self.assertFalse(doc.nonexist)
self.assertTrue(doc.play.startswith('Henry'))

# test slop and in order
self.assertEqual(193, client.search(Query('henry king')).total)
self.assertEqual(3,client.search(Query('henry king').slop(0).in_order()).total)
Expand Down Expand Up @@ -209,7 +216,10 @@ def testStopwords(self):
with conn as r:
# Creating a client with a given index name
client = Client('idx', port=conn.port)
client.drop_index()
try:
client.drop_index()
except:
pass
client.create_index((TextField('txt'),), stopwords = ['foo', 'bar', 'baz'])
client.add_document('doc1', txt = 'foo bar')
client.add_document('doc2', txt = 'hello world')
Expand Down Expand Up @@ -292,7 +302,7 @@ def testAutoComplete(self):

ac = AutoCompleter('ac', conn=r)
n = 0
with open('titles.csv') as f:
with open(TITLES_CSV) as f:
cr = csv.reader(f)

for row in cr:
Expand Down Expand Up @@ -331,6 +341,18 @@ def testAutoComplete(self):
for sug in ret2:
self.assertNotIn(sug.string, strs)

# Test with payload
ac.add_suggestions(Suggestion('pay1', payload='pl1'))
ac.add_suggestions(Suggestion('pay2', payload='pl2'))
ac.add_suggestions(Suggestion('pay3', payload='pl3'))

sugs = ac.get_suggestions('pay', with_payloads=True, with_scores=True)
self.assertEqual(3, len(sugs))
for sug in sugs:
self.assertTrue(sug.payload)
self.assertTrue(sug.payload.startswith('pl'))





Expand Down