Skip to content

Commit 0544769

Browse files
authored
Merge pull request #5 from moe-mizrak/feat/add-chunk-logic-to-filter-methods
Feat/add chunk logic to filter methods
2 parents ff2b3bb + ffbdb24 commit 0544769

File tree

6 files changed

+200
-82
lines changed

6 files changed

+200
-82
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ You can publish and run the migrations with:
2020
> - [ ] Modify LOG_PATTERN in FileLogReader, check ppssible patterns in real world cases so that LOG_PATTERN will cover them
2121
> - [ ] We might end up adding some limits to the file and db log reading (even though it might against what we are achieving here), where when a limit passes cetain size, it does not add anymore e.g. while (! feof($handle)) will be someting like while (!feof($handle) && $resultCount < $limit); also some logic can be added to db log reader for that.
2222
> - [ ] we might add user_id, requiest_id and ip_address columns to config and the logic, atm we use extra column for them. But those fields/columns could be added specifically.
23+
> - [ ] Maybe first get the count/size of the result before doing anything, if it is big then ask for more filter from user, or use streaming etc. So basically first send the count/size request to make sure result is not too big, and then proceed.
24+
> - [ ] Maybe to be able to work with big log data, another approach might be: The log_insights table acts as a normalized, summarized, and semantically searchable index over all log sources — optimized for MCP-style natural language queries. This way even thoug we use different log mechanisms, we normalized them into a single canonical format, so that mcp tool will perform better, also instead of working with big log data, we do have a summarized log table (with index) so that performance will be increased drastically. We need a system where periodically it adds up new log data to the table. Will it lack some info in the process of summarizing the logs, so that MCP tool might not find the answer? (Since we store semantic results/summaries, we might miss some asnwers for the prompts like "Show me the exact error messages user 433 got when their login failed.", "What was the stack trace for the 500 errors yesterday?")
2325
2426
## Contributing
2527

src/Readers/DatabaseLogReader.php

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ public function __construct(
2828
$this->searchableColumns = config('laravel-log-reader.db.searchable_columns', []);
2929
}
3030

31+
/**
32+
* {@inheritDoc}
33+
*/
3134
public function search(string $query, bool $chunk = false): array
3235
{
3336
if (empty($query)) {
@@ -40,32 +43,17 @@ public function search(string $query, bool $chunk = false): array
4043
->whereAny($columns, 'like', '%' . $query . '%')
4144
->orderByDesc($this->getColumn(LogTableColumnType::TIMESTAMP->value));
4245

43-
if ($chunk) {
44-
$chunkSize = (int) config('laravel-log-reader.db.chunk_size', 500);
45-
$results = [];
46-
47-
$builder->chunk($chunkSize, function ($chunk) use (&$results) {
48-
$results = array_merge($results, $chunk->all());
49-
});
50-
51-
return $this->convertToLogData($results);
52-
}
53-
54-
return $this->convertToLogData($builder->get()->all());
46+
return $this->executeQuery($builder, $chunk);
5547
}
5648

57-
public function filter(array $filters = []): array
49+
/**
50+
* {@inheritDoc}
51+
*/
52+
public function filter(array $filters = [], bool $chunk = false): array
5853
{
5954
$builder = $this->getQueryBuilder();
6055

61-
if (empty($filters)) {
62-
return $this->convertToLogData(
63-
$builder->orderByDesc($this->getColumn(LogTableColumnType::TIMESTAMP->value))->get()->all()
64-
);
65-
}
66-
6756
foreach ($filters as $key => $value) {
68-
// Apply specific filters based on known keys
6957
match ($key) {
7058
FilterKeyType::LEVEL->value => $builder->where($this->getColumn(LogTableColumnType::LEVEL->value), mb_strtolower((string) $value)),
7159
FilterKeyType::DATE_FROM->value => $builder->where($this->getColumn(LogTableColumnType::TIMESTAMP->value), '>=', $value),
@@ -75,10 +63,31 @@ public function filter(array $filters = []): array
7563
};
7664
}
7765

78-
// Return results ordered by creation date descending
79-
return $this->convertToLogData(
80-
$builder->orderByDesc($this->getColumn(LogTableColumnType::TIMESTAMP->value))->get()->all()
81-
);
66+
$builder->orderByDesc($this->getColumn(LogTableColumnType::TIMESTAMP->value));
67+
68+
return $this->executeQuery($builder, $chunk);
69+
}
70+
71+
/**
72+
* Executes the query builder and returns LogData array.
73+
* Optionally processes results in chunks for memory efficiency.
74+
*
75+
* @return array<LogData>
76+
*/
77+
protected function executeQuery(Builder $builder, bool $chunk = false): array
78+
{
79+
if ($chunk) {
80+
$chunkSize = (int) config('laravel-log-reader.db.chunk_size', 500);
81+
$results = [];
82+
83+
$builder->chunk($chunkSize, function ($chunk) use (&$results) {
84+
$results = array_merge($results, $chunk->all());
85+
});
86+
87+
return $this->convertToLogData($results);
88+
}
89+
90+
return $this->convertToLogData($builder->get()->all());
8291
}
8392

8493
protected function applyCustomFilter(Builder $builder, string $key, mixed $value): void

src/Readers/FileLogReader.php

Lines changed: 66 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -20,84 +20,96 @@
2020

2121
public function __construct(protected string $filePath) {}
2222

23+
/**
24+
* {@inheritDoc}
25+
*/
2326
public function search(string $query, bool $chunk = false): array
2427
{
25-
if (empty($query)) {
28+
if (empty($query) || ! file_exists($this->filePath)) {
2629
return [];
2730
}
2831

2932
$searchTerm = mb_strtolower($query);
3033

31-
if ($chunk) {
32-
$chunkSize = (int) config('laravel-log-reader.file.chunk_size', 512 * 1024); // default 512KB
33-
$handle = @fopen($this->filePath, 'r');
34-
35-
if (! $handle) {
36-
return [];
37-
}
34+
return $this->processFileChunks($chunk, function (array $logs) use ($searchTerm): array {
35+
return array_filter(
36+
$logs,
37+
fn(LogData $log) => str_contains(mb_strtolower($log->message ?? ''), $searchTerm) ||
38+
str_contains(mb_strtolower($log->context ?? ''), $searchTerm)
39+
);
40+
});
41+
}
3842

39-
$results = [];
40-
$buffer = '';
43+
/**
44+
* {@inheritDoc}
45+
*/
46+
public function filter(array $filters = [], bool $chunk = false): array
47+
{
48+
if (! file_exists($this->filePath)) {
49+
return [];
50+
}
4151

42-
while (! feof($handle)) {
43-
$buffer .= fread($handle, $chunkSize);
52+
if (empty($filters)) {
53+
return $this->parseLogFile();
54+
}
4455

45-
if (! feof($handle)) {
46-
$lastNewLinePos = strrpos($buffer, PHP_EOL);
56+
return $this->processFileChunks($chunk, function (array $logs) use ($filters): array {
57+
foreach ($filters as $key => $value) {
58+
$logs = array_filter($logs, fn(LogData $log) => $this->matchesFilter($log, $key, $value));
59+
}
4760

48-
if ($lastNewLinePos === false) {
49-
// no newline yet — read more before parsing
50-
continue;
51-
}
61+
return $logs;
62+
});
63+
}
5264

53-
$contentChunk = substr($buffer, 0, $lastNewLinePos);
54-
$buffer = substr($buffer, $lastNewLinePos + 1);
55-
} else {
56-
$contentChunk = $buffer;
57-
$buffer = '';
58-
}
65+
/**
66+
* @param callable(array<LogData>)
67+
*
68+
* @return array<LogData>
69+
*/
70+
protected function processFileChunks(bool $chunk, callable $callback): array
71+
{
72+
if (! $chunk) {
73+
$logs = $this->parseLogFile();
5974

60-
// Parse and filter this chunk
61-
$logs = $this->convertToLogData($this->extractLogsFromContent($contentChunk));
75+
return array_values($callback($logs));
76+
}
6277

63-
$filtered = array_filter(
64-
$logs,
65-
fn(LogData $log) => str_contains(mb_strtolower($log->message ?? ''), $searchTerm)
66-
|| str_contains(mb_strtolower($log->context ?? ''), $searchTerm)
67-
);
78+
$chunkSize = (int) config('laravel-log-reader.file.chunk_size', 512 * 1024);
79+
$handle = @fopen($this->filePath, 'r');
6880

69-
$results = array_merge($results, array_values($filtered));
70-
}
81+
if (! $handle) {
82+
return [];
83+
}
7184

72-
fclose($handle);
85+
$results = [];
86+
$buffer = '';
7387

74-
return $results;
75-
}
88+
while (! feof($handle)) {
89+
$buffer .= fread($handle, $chunkSize);
7690

77-
// Non-chunked (default)
78-
$logs = $this->parseLogFile();
91+
if (! feof($handle)) {
92+
$lastNewLinePos = strrpos($buffer, PHP_EOL);
7993

80-
return array_values(array_filter(
81-
$logs,
82-
fn(LogData $log) => str_contains(mb_strtolower($log->message ?? ''), $searchTerm) ||
83-
str_contains(mb_strtolower($log->context ?? ''), $searchTerm)
84-
));
85-
}
94+
if ($lastNewLinePos === false) {
95+
continue;
96+
}
8697

87-
// todo add chunking/stream logic to filter method for both database filter method and here
88-
public function filter(array $filters = []): array
89-
{
90-
$logs = $this->parseLogFile();
98+
$contentChunk = substr($buffer, 0, $lastNewLinePos);
99+
$buffer = substr($buffer, $lastNewLinePos + 1);
100+
} else {
101+
$contentChunk = $buffer;
102+
$buffer = '';
103+
}
91104

92-
if (empty($filters)) {
93-
return $logs;
105+
$logs = $this->convertToLogData($this->extractLogsFromContent($contentChunk));
106+
$filtered = $callback($logs);
107+
$results = array_merge($results, array_values($filtered));
94108
}
95109

96-
foreach ($filters as $key => $value) {
97-
$logs = array_filter($logs, fn(LogData $log) => $this->matchesFilter($log, $key, $value));
98-
}
110+
fclose($handle);
99111

100-
return array_values($logs);
112+
return $results;
101113
}
102114

103115
/**

src/Readers/LogReaderInterface.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ interface LogReaderInterface
1111
{
1212
/**
1313
* Search logs based on a query string.
14+
*
15+
* @return array<LogData>
1416
*/
1517
public function search(string $query, bool $chunk = false): array;
1618

1719
/**
1820
* Filter logs based on filter criteria.
21+
*
22+
* @return array<LogData>
1923
*/
20-
public function filter(array $filters = []): array;
24+
public function filter(array $filters = [], bool $chunk = false): array;
2125
}

tests/Readers/DatabaseLogReaderTest.php

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,6 @@ public function it_decodes_extra_with_user_and_request_metadata(): void
319319
/* ASSERT */
320320
$this->assertCount(1, $results);
321321
$log = $results[0];
322-
323322
$this->assertSame('User metadata logged', $log->message);
324323
$this->assertIsArray($log->extra);
325324
$this->assertSame($userId, $log->extra['user_id']);
@@ -433,6 +432,66 @@ public function it_searches_logs_with_chunk_size_greater_than_one(): void
433432
$this->assertSame($expected, $messages);
434433
}
435434

435+
#[Test]
436+
public function it_filters_logs_using_chunking(): void
437+
{
438+
/* SETUP */
439+
config(['laravel-log-reader.db.chunk_size' => 1]);
440+
441+
/* EXECUTE */
442+
$results = $this->databaseLogReader->filter([FilterKeyType::LEVEL->value => 'error'], true);
443+
444+
/* ASSERT */
445+
$this->assertCount(1, $results);
446+
$this->assertSame('Payment failed', $results[0]->message);
447+
}
448+
449+
#[Test]
450+
public function it_filters_returns_same_results_with_and_without_chunking(): void
451+
{
452+
/* SETUP */
453+
config(['laravel-log-reader.db.chunk_size' => 1]);
454+
455+
/* EXECUTE */
456+
$nonChunked = $this->databaseLogReader->filter([FilterKeyType::CHANNEL->value => 'payment'], false);
457+
$chunked = $this->databaseLogReader->filter([FilterKeyType::CHANNEL->value => 'payment'], true);
458+
459+
/* ASSERT */
460+
$this->assertCount(1, $nonChunked);
461+
$this->assertCount(1, $chunked);
462+
$this->assertSame($nonChunked[0]->message, $chunked[0]->message);
463+
}
464+
465+
#[Test]
466+
public function it_filters_logs_with_chunk_size_greater_than_one(): void
467+
{
468+
/* SETUP */
469+
config(['laravel-log-reader.db.chunk_size' => 2]);
470+
$count = 5;
471+
for ($i = 1; $i <= $count; $i++) {
472+
DB::table($this->table)->insert([
473+
'level' => 'warning',
474+
'message' => "Warning {$i}",
475+
'channel' => 'bulk',
476+
'context' => '{}',
477+
'extra' => '{}',
478+
'created_at' => now()->subSeconds($count - $i),
479+
]);
480+
}
481+
482+
/* EXECUTE */
483+
$results = $this->databaseLogReader->filter([FilterKeyType::LEVEL->value => 'warning'], true);
484+
485+
/* ASSERT */
486+
$this->assertCount($count, $results);
487+
$expected = [];
488+
for ($i = $count; $i >= 1; $i--) {
489+
$expected[] = "Warning {$i}";
490+
}
491+
$messages = array_map(fn($r) => $r->message, $results);
492+
$this->assertSame($expected, $messages);
493+
}
494+
436495
/**
437496
* Create the logs table for testing.
438497
*/

tests/Readers/FileLogReaderTest.php

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,6 @@ public function it_returns_same_results_with_and_without_chunking(): void
237237

238238
/* ASSERT */
239239
$this->assertCount(count($nonChunked), $chunked);
240-
241-
/* ASSERT */
242240
$nonChunkedMessages = array_map(fn($r) => $r->message, $nonChunked);
243241
$chunkedMessages = array_map(fn($r) => $r->message, $chunked);
244242
$this->assertSame($nonChunkedMessages, $chunkedMessages);
@@ -278,6 +276,40 @@ public function it_reads_all_logs_correctly_when_chunking_with_unique_data(): vo
278276
$this->assertSame($expectedMessages, $messages);
279277
}
280278

279+
#[Test]
280+
public function it_filters_logs_using_chunking(): void
281+
{
282+
/* SETUP */
283+
config(['laravel-log-reader.file.chunk_size' => 64]);
284+
285+
/* EXECUTE */
286+
$results = $this->fileReader->filter([
287+
FilterKeyType::LEVEL->value => 'error',
288+
], true);
289+
290+
/* ASSERT */
291+
$this->assertCount(1, $results);
292+
$this->assertSame('ERROR', $results[0]->level);
293+
$this->assertStringContainsString('undefined method', $results[0]->message);
294+
}
295+
296+
#[Test]
297+
public function it_filters_using_chunking_and_checks_same_results_with_and_without_chunking(): void
298+
{
299+
/* SETUP */
300+
config(['laravel-log-reader.file.chunk_size' => 64]);
301+
302+
/* EXECUTE */
303+
$nonChunked = $this->fileReader->filter([FilterKeyType::LEVEL->value => 'info'], false);
304+
$chunked = $this->fileReader->filter([FilterKeyType::LEVEL->value => 'info'], true);
305+
306+
/* ASSERT */
307+
$this->assertCount(count($nonChunked), $chunked);
308+
$nonChunkedMessages = array_map(fn($r) => $r->message, $nonChunked);
309+
$chunkedMessages = array_map(fn($r) => $r->message, $chunked);
310+
$this->assertSame($nonChunkedMessages, $chunkedMessages);
311+
}
312+
281313
/**
282314
* Creates a temporary log file with sample log entries for testing.
283315
*/

0 commit comments

Comments
 (0)