Skip to content

Commit 4690692

Browse files
author
Scott MacVicar
committed
Replace ereg code with pcre, fix duplicate macro names and segfault. Patch by Mikko
1 parent 42feedd commit 4690692

File tree

4 files changed

+223
-67
lines changed

4 files changed

+223
-67
lines changed

ext/fileinfo/libmagic/apprentice.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,7 +1118,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
11181118

11191119
m->mask_op = 0;
11201120
if (*l == '~') {
1121-
if (!IS_STRING(m->type))
1121+
if (!IS_LIBMAGIC_STRING(m->type))
11221122
m->mask_op |= FILE_OPINVERSE;
11231123
else if (ms->flags & MAGIC_CHECK)
11241124
file_magwarn(ms, "'~' invalid for string types");
@@ -1128,7 +1128,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
11281128
m->str_flags = 0;
11291129
m->num_mask = 0;
11301130
if ((op = get_op(*l)) != -1) {
1131-
if (!IS_STRING(m->type)) {
1131+
if (!IS_LIBMAGIC_STRING(m->type)) {
11321132
uint64_t val;
11331133
++l;
11341134
m->mask_op |= op;
@@ -2066,7 +2066,7 @@ bs1(struct magic *m)
20662066
m->offset = swap4((uint32_t)m->offset);
20672067
m->in_offset = swap4((uint32_t)m->in_offset);
20682068
m->lineno = swap4((uint32_t)m->lineno);
2069-
if (IS_STRING(m->type)) {
2069+
if (IS_LIBMAGIC_STRING(m->type)) {
20702070
m->str_range = swap4(m->str_range);
20712071
m->str_flags = swap4(m->str_flags);
20722072
}

ext/fileinfo/libmagic/file.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@
4646
#ifdef HAVE_INTTYPES_H
4747
#include <inttypes.h>
4848
#endif
49-
#include <regex.h>
49+
50+
#include "php.h"
51+
#include "ext/standard/php_string.h"
52+
#include "ext/pcre/php_pcre.h"
53+
5054
#include <sys/types.h>
5155
/* Do this here and now, because struct stat gets re-defined on solaris */
5256
#include <sys/stat.h>
@@ -165,7 +169,7 @@ struct magic {
165169
#defineFILE_LEDOUBLE38
166170
#defineFILE_NAMES_SIZE39/* size of array to contain all names */
167171

168-
#define IS_STRING(t) \
172+
#define IS_LIBMAGIC_STRING(t) \
169173
((t) == FILE_STRING || \
170174
(t) == FILE_PSTRING || \
171175
(t) == FILE_BESTRING16 || \

ext/fileinfo/libmagic/print.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ file_mdump(struct magic *m)
7474
if (m->mask_op & FILE_OPINVERSE)
7575
(void) fputc('~', stderr);
7676

77-
if (IS_STRING(m->type)) {
77+
if (IS_LIBMAGIC_STRING(m->type)) {
7878
if (m->str_flags) {
7979
(void) fputc('/', stderr);
8080
if (m->str_flags & STRING_COMPACT_BLANK)

ext/fileinfo/libmagic/softmagic.c

Lines changed: 213 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
#include <stdlib.h>
3737
#include <time.h>
3838

39+
#ifndef PREG_OFFSET_CAPTURE
40+
# define PREG_OFFSET_CAPTURE (1<<8)
41+
#endif
42+
3943

4044
#ifndeflint
4145
FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $")
@@ -281,25 +285,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
281285
private int
282286
check_fmt(struct magic_set *ms, struct magic *m)
283287
{
284-
regex_t rx = {0};
285-
int rc;
286-
288+
pcre *pce;
289+
int re_options;
290+
pcre_extra *re_extra;
291+
287292
if (strchr(MAGIC_DESC, '%') == NULL) {
288293
return 0;
289294
}
290-
291-
rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
292-
if (rc) {
293-
char errmsg[512];
294-
(void)regerror(rc, &rx, errmsg, sizeof(errmsg));
295-
file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
295+
296+
if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra, &re_options TSRMLS_CC)) == NULL) {
296297
return -1;
297298
} else {
298-
regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1);
299-
rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0);
300-
efree(pmatch);
301-
regfree(&rx);
302-
return !rc;
299+
return !pcre_exec(pce, re_extra, MAGIC_DESC, strlen(MAGIC_DESC), 0, re_options, NULL, 0);
303300
}
304301
}
305302

@@ -1488,6 +1485,66 @@ file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags)
14881485
return file_strncmp(a, b, len, flags);
14891486
}
14901487

1488+
private void
1489+
convert_libmagic_pattern(zval *pattern, int options)
1490+
{
1491+
int i, j=0;
1492+
char *t;
1493+
1494+
t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5);
1495+
memset(t, '\0', sizeof(t));
1496+
1497+
t[j++] = '~';
1498+
1499+
for (i=0; i<Z_STRLEN_P(pattern); i++, j++) {
1500+
switch (Z_STRVAL_P(pattern)[i]) {
1501+
case '?':
1502+
t[j] = '.';
1503+
break;
1504+
case '*':
1505+
t[j++] = '.';
1506+
t[j] = '*';
1507+
break;
1508+
case '.':
1509+
t[j++] = '\\';
1510+
t[j] = '.';
1511+
break;
1512+
case '\\':
1513+
t[j++] = '\\';
1514+
t[j] = '\\';
1515+
break;
1516+
case '(':
1517+
t[j++] = '\\';
1518+
t[j] = '(';
1519+
break;
1520+
case ')':
1521+
t[j++] = '\\';
1522+
t[j] = ')';
1523+
break;
1524+
case '~':
1525+
t[j++] = '\\';
1526+
t[j] = '~';
1527+
break;
1528+
default:
1529+
t[j] = Z_STRVAL_P(pattern)[i];
1530+
break;
1531+
}
1532+
}
1533+
t[j++] = '~';
1534+
1535+
if (options & PCRE_CASELESS)
1536+
t[j++] = 'm';
1537+
1538+
if (options & PCRE_MULTILINE)
1539+
t[j++] = 'i';
1540+
1541+
t[j]=0;
1542+
1543+
Z_STRVAL_P(pattern) = t;
1544+
Z_STRLEN_P(pattern) = j;
1545+
1546+
}
1547+
14911548
private int
14921549
magiccheck(struct magic_set *ms, struct magic *m)
14931550
{
@@ -1642,61 +1699,156 @@ magiccheck(struct magic_set *ms, struct magic *m)
16421699
}
16431700
break;
16441701
}
1702+
16451703
case FILE_REGEX: {
1646-
int rc;
1647-
regex_t rx = {0};
1648-
char errmsg[512];
1649-
1650-
if (ms->search.s == NULL)
1651-
return 0;
1704+
zval *pattern;
1705+
int options = 0;
1706+
pcre_cache_entry *pce;
1707+
1708+
MAKE_STD_ZVAL(pattern);
1709+
Z_STRVAL_P(pattern) = (char *)m->value.s;
1710+
Z_STRLEN_P(pattern) = m->vallen;
1711+
Z_TYPE_P(pattern) = IS_STRING;
1712+
1713+
options |= PCRE_MULTILINE;
1714+
1715+
if (m->str_flags & STRING_IGNORE_CASE) {
1716+
options |= PCRE_CASELESS;
1717+
}
1718+
1719+
convert_libmagic_pattern(pattern, options);
16521720

1653-
l = 0;
1654-
rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
1655-
if (rc) {
1656-
(void)regerror(rc, &rx, errmsg, sizeof(errmsg));
1657-
file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
1658-
v = (uint64_t)-1;
1721+
if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern), Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) {
1722+
return -1;
16591723
} else {
1660-
regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1);
1661-
#ifndef REG_STARTEND
1662-
#defineREG_STARTEND0
1663-
size_t l = ms->search.s_len - 1;
1664-
char c = ms->search.s[l];
1665-
((char *)(intptr_t)ms->search.s)[l] = '\0';
1666-
#else
1667-
pmatch[0].rm_so = 0;
1668-
pmatch[0].rm_eo = ms->search.s_len;
1669-
#endif
1670-
rc = regexec(&rx, (const char *)ms->search.s, 1, pmatch, REG_STARTEND);
1671-
#if REG_STARTEND == 0
1672-
((char *)(intptr_t)ms->search.s)[l] = c;
1673-
#endif
1674-
switch (rc) {
1675-
case 0:
1676-
ms->search.s += (int)pmatch[0].rm_so;
1677-
ms->search.offset += (size_t)pmatch[0].rm_so;
1678-
ms->search.rm_len = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
1679-
v = 0;
1680-
break;
1724+
/* pce now contains the compiled regex */
1725+
zval *retval;
1726+
zval *subpats;
1727+
char *haystack;
1728+
1729+
MAKE_STD_ZVAL(retval);
1730+
ALLOC_INIT_ZVAL(subpats);
1731+
1732+
/* Cut the search len from haystack, equals to REG_STARTEND */
1733+
haystack = estrndup(ms->search.s, ms->search.s_len);
16811734

1682-
case REG_NOMATCH:
1683-
v = 1;
1684-
break;
1735+
/* match v = 0, no match v = 1 */
1736+
php_pcre_match_impl(pce, haystack, ms->search.s_len, retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC);
1737+
1738+
/* Free haystack */
1739+
efree(haystack);
1740+
1741+
if (Z_LVAL_P(retval) < 0) {
1742+
zval_ptr_dtor(&subpats);
1743+
FREE_ZVAL(retval);
1744+
efree(Z_STRVAL_P(pattern));
1745+
efree(pattern);
1746+
return -1;
1747+
} else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats) == IS_ARRAY)) {
1748+
1749+
/* Need to fetch global match which equals pmatch[0] */
1750+
HashTable *ht = Z_ARRVAL_P(subpats);
1751+
HashPosition outer_pos;
1752+
zval *pattern_match = NULL, *pattern_offset = NULL;
1753+
1754+
zend_hash_internal_pointer_reset_ex(ht, &outer_pos);
1755+
1756+
if (zend_hash_has_more_elements_ex(ht, &outer_pos) == SUCCESS &&
1757+
zend_hash_move_forward_ex(ht, &outer_pos)) {
1758+
1759+
zval **ppzval;
1760+
1761+
/* The first element (should be) is the global match
1762+
Need to move to the inner array to get the global match */
1763+
1764+
if (zend_hash_get_current_data_ex(ht, (void**)&ppzval, &outer_pos) != FAILURE) {
1765+
1766+
HashTable *inner_ht;
1767+
HashPosition inner_pos;
1768+
zval **match, **offset;
1769+
zval tmpcopy = **ppzval, matchcopy, offsetcopy;
1770+
1771+
zval_copy_ctor(&tmpcopy);
1772+
INIT_PZVAL(&tmpcopy);
1773+
1774+
inner_ht = Z_ARRVAL(tmpcopy);
1775+
1776+
/* If everything goes according to the master plan
1777+
tmpcopy now contains two elements:
1778+
0 = the match
1779+
1 = starting position of the match */
1780+
zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos);
1781+
1782+
if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
1783+
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
1784+
1785+
if (zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) != FAILURE) {
1786+
1787+
matchcopy = **match;
1788+
zval_copy_ctor(&matchcopy);
1789+
INIT_PZVAL(&matchcopy);
1790+
convert_to_string(&matchcopy);
1791+
1792+
MAKE_STD_ZVAL(pattern_match);
1793+
Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy);
1794+
Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy);
1795+
Z_TYPE_P(pattern_match) = IS_STRING;
1796+
1797+
zval_dtor(&matchcopy);
1798+
}
1799+
}
1800+
1801+
if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
1802+
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
1803+
1804+
if (zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) != FAILURE) {
1805+
1806+
offsetcopy = **offset;
1807+
zval_copy_ctor(&offsetcopy);
1808+
INIT_PZVAL(&offsetcopy);
1809+
convert_to_long(&offsetcopy);
1810+
1811+
MAKE_STD_ZVAL(pattern_offset);
1812+
Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy);
1813+
Z_TYPE_P(pattern_offset) = IS_LONG;
1814+
1815+
zval_dtor(&offsetcopy);
1816+
}
1817+
}
1818+
zval_dtor(&tmpcopy);
1819+
}
1820+
1821+
if ((pattern_match != NULL) && (pattern_offset != NULL)) {
1822+
ms->search.s += (int)Z_LVAL_P(pattern_offset); /* this is where the match starts */
1823+
ms->search.offset += (size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */
1824+
ms->search.rm_len = Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */;
1825+
v = 0;
1826+
1827+
efree(pattern_match);
1828+
efree(pattern_offset);
1829+
1830+
} else {
1831+
zval_ptr_dtor(&subpats);
1832+
FREE_ZVAL(retval);
1833+
efree(Z_STRVAL_P(pattern));
1834+
efree(pattern);
1835+
return -1;
1836+
}
1837+
}
16851838

1686-
default:
1687-
(void)regerror(rc, &rx, errmsg, sizeof(errmsg));
1688-
file_magerror(ms, "regexec error %d, (%s)", rc, errmsg);
1689-
v = (uint64_t)-1;
1690-
break;
1839+
1840+
} else {
1841+
v = 1;
16911842
}
1692-
regfree(&rx);
1693-
efree(pmatch);
1843+
zval_ptr_dtor(&subpats);
1844+
FREE_ZVAL(retval);
16941845
}
1695-
if (v == (uint64_t)-1) {
1696-
return -1;
1697-
}
1698-
break;
1846+
efree(Z_STRVAL_P(pattern));
1847+
efree(pattern);
1848+
break;
16991849
}
1850+
1851+
17001852
default:
17011853
file_magerror(ms, "invalid type %d in magiccheck()", m->type);
17021854
return -1;

0 commit comments

Comments
 (0)