Skip to content
12 changes: 12 additions & 0 deletions app/Config/services.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
// Callback URL for social authentication methods
'callback_url' => env('APP_URL', false),

// LLM Service
// Options: openai
'llm' => env('LLM_SERVICE', ''),

// OpenAI API-compatible service details
'openai' => [
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
'key' => env('OPENAI_KEY', ''),
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
],

'github' => [
'client_id' => env('GITHUB_APP_ID', false),
'client_secret' => env('GITHUB_APP_SECRET', false),
Expand Down
46 changes: 46 additions & 0 deletions app/Console/Commands/RegenerateVectorsCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

namespace BookStack\Console\Commands;

use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Queries\SearchVector;
use BookStack\Search\Queries\StoreEntityVectorsJob;
use Illuminate\Console\Command;

class RegenerateVectorsCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'bookstack:regenerate-vectors';

/**
* The console command description.
*
* @var string
*/
protected $description = 'Re-index vectors for all content in the system';

/**
* Execute the console command.
*/
public function handle(EntityProvider $entityProvider)
{
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
SearchVector::query()->delete();

$types = $entityProvider->all();
foreach ($types as $type => $typeInstance) {
$this->info("Creating jobs to store vectors for {$type} data...");
/** @var Entity[] $entities */
$typeInstance->newQuery()->chunkById(100, function ($entities) {
foreach ($entities as $entity) {
dispatch(new StoreEntityVectorsJob($entity));
}
});
}
}
}
89 changes: 89 additions & 0 deletions app/Search/Queries/EntityVectorGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?php

declare(strict_types=1);

namespace BookStack\Search\Queries;

use BookStack\Activity\Models\Tag;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Queries\Services\VectorQueryService;
use Illuminate\Support\Facades\DB;

class EntityVectorGenerator
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider
) {
}

public function generateAndStore(Entity $entity): void
{
$vectorService = $this->vectorQueryServiceProvider->get();

$text = $this->entityToPlainText($entity);
$chunks = $this->chunkText($text);
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);

$this->deleteExistingEmbeddingsForEntity($entity);
$this->storeEmbeddings($embeddings, $chunks, $entity);
}

protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
{
SearchVector::query()
->where('entity_type', '=', $entity->getMorphClass())
->where('entity_id', '=', $entity->id)
->delete();
}

protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
{
$toInsert = [];

foreach ($embeddings as $index => $embedding) {
$text = $textChunks[$index];
$toInsert[] = [
'entity_id' => $entity->id,
'entity_type' => $entity->getMorphClass(),
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
'text' => $text,
];
}

$chunks = array_chunk($toInsert, 500);
foreach ($chunks as $chunk) {
SearchVector::query()->insert($chunk);
}
}

/**
* @param string[] $chunks
* @return float[] array
*/
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
{
$embeddings = [];
foreach ($chunks as $index => $chunk) {
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
}
return $embeddings;
}

/**
* @return string[]
*/
protected function chunkText(string $text): array
{
return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text);
}

protected function entityToPlainText(Entity $entity): string
{
$tags = $entity->tags()->get();
$tagText = $tags->map(function (Tag $tag) {
return $tag->name . ': ' . $tag->value;
})->join('\n');

return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField};
}
}
26 changes: 26 additions & 0 deletions app/Search/Queries/LlmQueryRunner.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

namespace BookStack\Search\Queries;

use Exception;

class LlmQueryRunner
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
) {
}

/**
* Run a query against the configured LLM to produce a text response.
* @param VectorSearchResult[] $vectorResults
* @throws Exception
*/
public function run(string $query, array $vectorResults): string
{
$queryService = $this->vectorQueryServiceProvider->get();

$matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults));
return $queryService->query($query, $matchesText);
}
}
61 changes: 61 additions & 0 deletions app/Search/Queries/QueryController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?php

namespace BookStack\Search\Queries;

use BookStack\Http\Controller;
use BookStack\Search\SearchRunner;
use Illuminate\Http\Request;

class QueryController extends Controller
{
public function __construct(
protected SearchRunner $searchRunner,
) {
// TODO - Check via testing
$this->middleware(function ($request, $next) {
if (!VectorQueryServiceProvider::isEnabled()) {
$this->showPermissionError('/');
}
return $next($request);
});
}

/**
* Show the view to start a vector/LLM-based query search.
*/
public function show(Request $request)
{
$query = $request->get('ask', '');

// TODO - Set page title

return view('search.query', [
'query' => $query,
]);
}

/**
* Perform a vector/LLM-based query search.
*/
public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner)
{
// TODO - Rate limiting
$query = $request->get('query', '');

return response()->eventStream(function () use ($query, $searchRunner, $llmRunner) {
$results = $query ? $searchRunner->run($query) : [];

$entities = [];
foreach ($results as $result) {
$entityKey = $result->entity->getMorphClass() . ':' . $result->entity->id;
if (!isset($entities[$entityKey])) {
$entities[$entityKey] = $result->entity;
}
}

yield ['view' => view('entities.list', ['entities' => $entities])->render()];

yield ['result' => $llmRunner->run($query, $results)];
});
}
}
26 changes: 26 additions & 0 deletions app/Search/Queries/SearchVector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

declare(strict_types=1);

namespace BookStack\Search\Queries;

use BookStack\Permissions\Models\JointPermission;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\HasMany;

/**
* @property string $entity_type
* @property int $entity_id
* @property string $text
* @property string $embedding
*/
class SearchVector extends Model
{
public $timestamps = false;

public function jointPermissions(): HasMany
{
return $this->hasMany(JointPermission::class, 'entity_id', 'entity_id')
->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type');
}
}
66 changes: 66 additions & 0 deletions app/Search/Queries/Services/OpenAiVectorQueryService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?php

namespace BookStack\Search\Queries\Services;

use BookStack\Http\HttpRequestService;

class OpenAiVectorQueryService implements VectorQueryService
{
protected string $key;
protected string $endpoint;
protected string $embeddingModel;
protected string $queryModel;

public function __construct(
protected array $options,
protected HttpRequestService $http,
) {
// TODO - Some kind of validation of options
$this->key = $this->options['key'] ?? '';
$this->endpoint = $this->options['endpoint'] ?? '';
$this->embeddingModel = $this->options['embedding_model'] ?? '';
$this->queryModel = $this->options['query_model'] ?? '';
}

protected function jsonRequest(string $method, string $uri, array $data): array
{
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
$client = $this->http->buildClient(30);
$request = $this->http->jsonRequest($method, $fullUrl, $data)
->withHeader('Authorization', 'Bearer ' . $this->key);

$response = $client->sendRequest($request);
return json_decode($response->getBody()->getContents(), true);
}

public function generateEmbeddings(string $text): array
{
$response = $this->jsonRequest('POST', 'v1/embeddings', [
'input' => $text,
'model' => $this->embeddingModel,
]);

return $response['data'][0]['embedding'];
}

public function query(string $input, array $context): string
{
$formattedContext = implode("\n", $context);

$response = $this->jsonRequest('POST', 'v1/chat/completions', [
'model' => $this->queryModel,
'messages' => [
[
'role' => 'developer',
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
],
[
'role' => 'user',
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
]
],
]);

return $response['choices'][0]['message']['content'] ?? '';
}
}
21 changes: 21 additions & 0 deletions app/Search/Queries/Services/VectorQueryService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

namespace BookStack\Search\Queries\Services;

interface VectorQueryService
{
/**
* Generate embedding vectors from the given chunk of text.
* @return float[]
*/
public function generateEmbeddings(string $text): array;

/**
* Query the LLM service using the given user input, and
* relevant context text retrieved locally via a vector search.
* Returns the response output text from the LLM.
*
* @param string[] $context
*/
public function query(string $input, array $context): string;
}
30 changes: 30 additions & 0 deletions app/Search/Queries/StoreEntityVectorsJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

declare(strict_types=1);

namespace BookStack\Search\Queries;

use BookStack\Entities\Models\Entity;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Queue\Queueable;

class StoreEntityVectorsJob implements ShouldQueue
{
use Queueable;

/**
* Create a new job instance.
*/
public function __construct(
protected Entity $entity
) {
}

/**
* Execute the job.
*/
public function handle(EntityVectorGenerator $generator): void
{
$generator->generateAndStore($this->entity);
}
}
Loading
Loading