|
36 | 36 | #include <stdlib.h> |
37 | 37 | #include <time.h> |
38 | 38 |
|
| 39 | +#ifndef PREG_OFFSET_CAPTURE |
| 40 | +# define PREG_OFFSET_CAPTURE (1<<8) |
| 41 | +#endif |
| 42 | + |
39 | 43 |
|
40 | 44 | #ifndeflint |
41 | 45 | FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $") |
@@ -281,25 +285,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, |
281 | 285 | private int |
282 | 286 | check_fmt(struct magic_set *ms, struct magic *m) |
283 | 287 | { |
284 | | -regex_t rx = {0}; |
285 | | -int rc; |
286 | | - |
| 288 | +pcre *pce; |
| 289 | +int re_options; |
| 290 | +pcre_extra *re_extra; |
| 291 | + |
287 | 292 | if (strchr(MAGIC_DESC, '%') == NULL) { |
288 | 293 | return 0; |
289 | 294 | } |
290 | | - |
291 | | -rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); |
292 | | -if (rc) { |
293 | | -char errmsg[512]; |
294 | | -(void)regerror(rc, &rx, errmsg, sizeof(errmsg)); |
295 | | -file_magerror(ms, "regex error %d, (%s)", rc, errmsg); |
| 295 | + |
| 296 | +if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra, &re_options TSRMLS_CC)) == NULL) { |
296 | 297 | return -1; |
297 | 298 | } else { |
298 | | -regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1); |
299 | | -rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0); |
300 | | -efree(pmatch); |
301 | | -regfree(&rx); |
302 | | -return !rc; |
| 299 | + return !pcre_exec(pce, re_extra, MAGIC_DESC, strlen(MAGIC_DESC), 0, re_options, NULL, 0); |
303 | 300 | } |
304 | 301 | } |
305 | 302 |
|
@@ -1488,6 +1485,66 @@ file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags) |
1488 | 1485 | return file_strncmp(a, b, len, flags); |
1489 | 1486 | } |
1490 | 1487 |
|
| 1488 | +private void |
| 1489 | +convert_libmagic_pattern(zval *pattern, int options) |
| 1490 | +{ |
| 1491 | +int i, j=0; |
| 1492 | +char *t; |
| 1493 | + |
| 1494 | +t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5); |
| 1495 | +memset(t, '\0', sizeof(t)); |
| 1496 | + |
| 1497 | +t[j++] = '~'; |
| 1498 | + |
| 1499 | +for (i=0; i<Z_STRLEN_P(pattern); i++, j++) { |
| 1500 | +switch (Z_STRVAL_P(pattern)[i]) { |
| 1501 | +case '?': |
| 1502 | +t[j] = '.'; |
| 1503 | +break; |
| 1504 | +case '*': |
| 1505 | +t[j++] = '.'; |
| 1506 | +t[j] = '*'; |
| 1507 | +break; |
| 1508 | +case '.': |
| 1509 | +t[j++] = '\\'; |
| 1510 | +t[j] = '.'; |
| 1511 | +break; |
| 1512 | +case '\\': |
| 1513 | +t[j++] = '\\'; |
| 1514 | +t[j] = '\\'; |
| 1515 | +break; |
| 1516 | +case '(': |
| 1517 | +t[j++] = '\\'; |
| 1518 | +t[j] = '('; |
| 1519 | +break; |
| 1520 | +case ')': |
| 1521 | +t[j++] = '\\'; |
| 1522 | +t[j] = ')'; |
| 1523 | +break; |
| 1524 | +case '~': |
| 1525 | +t[j++] = '\\'; |
| 1526 | +t[j] = '~'; |
| 1527 | +break; |
| 1528 | +default: |
| 1529 | +t[j] = Z_STRVAL_P(pattern)[i]; |
| 1530 | +break; |
| 1531 | +} |
| 1532 | +} |
| 1533 | +t[j++] = '~'; |
| 1534 | + |
| 1535 | +if (options & PCRE_CASELESS) |
| 1536 | +t[j++] = 'm'; |
| 1537 | + |
| 1538 | +if (options & PCRE_MULTILINE) |
| 1539 | +t[j++] = 'i'; |
| 1540 | + |
| 1541 | +t[j]=0; |
| 1542 | + |
| 1543 | +Z_STRVAL_P(pattern) = t; |
| 1544 | +Z_STRLEN_P(pattern) = j; |
| 1545 | + |
| 1546 | +} |
| 1547 | + |
1491 | 1548 | private int |
1492 | 1549 | magiccheck(struct magic_set *ms, struct magic *m) |
1493 | 1550 | { |
@@ -1642,61 +1699,156 @@ magiccheck(struct magic_set *ms, struct magic *m) |
1642 | 1699 | } |
1643 | 1700 | break; |
1644 | 1701 | } |
| 1702 | + |
1645 | 1703 | case FILE_REGEX: { |
1646 | | -int rc; |
1647 | | -regex_t rx = {0}; |
1648 | | -char errmsg[512]; |
1649 | | - |
1650 | | -if (ms->search.s == NULL) |
1651 | | -return 0; |
| 1704 | +zval *pattern; |
| 1705 | +int options = 0; |
| 1706 | +pcre_cache_entry *pce; |
| 1707 | + |
| 1708 | +MAKE_STD_ZVAL(pattern); |
| 1709 | +Z_STRVAL_P(pattern) = (char *)m->value.s; |
| 1710 | +Z_STRLEN_P(pattern) = m->vallen; |
| 1711 | +Z_TYPE_P(pattern) = IS_STRING; |
| 1712 | + |
| 1713 | +options |= PCRE_MULTILINE; |
| 1714 | + |
| 1715 | +if (m->str_flags & STRING_IGNORE_CASE) { |
| 1716 | +options |= PCRE_CASELESS; |
| 1717 | +} |
| 1718 | + |
| 1719 | +convert_libmagic_pattern(pattern, options); |
1652 | 1720 |
|
1653 | | -l = 0; |
1654 | | -rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); |
1655 | | -if (rc) { |
1656 | | -(void)regerror(rc, &rx, errmsg, sizeof(errmsg)); |
1657 | | -file_magerror(ms, "regex error %d, (%s)", rc, errmsg); |
1658 | | -v = (uint64_t)-1; |
| 1721 | +if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern), Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) { |
| 1722 | +return -1; |
1659 | 1723 | } else { |
1660 | | -regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1); |
1661 | | -#ifndef REG_STARTEND |
1662 | | -#defineREG_STARTEND0 |
1663 | | -size_t l = ms->search.s_len - 1; |
1664 | | -char c = ms->search.s[l]; |
1665 | | -((char *)(intptr_t)ms->search.s)[l] = '\0'; |
1666 | | -#else |
1667 | | -pmatch[0].rm_so = 0; |
1668 | | -pmatch[0].rm_eo = ms->search.s_len; |
1669 | | -#endif |
1670 | | -rc = regexec(&rx, (const char *)ms->search.s, 1, pmatch, REG_STARTEND); |
1671 | | -#if REG_STARTEND == 0 |
1672 | | -((char *)(intptr_t)ms->search.s)[l] = c; |
1673 | | -#endif |
1674 | | -switch (rc) { |
1675 | | -case 0: |
1676 | | -ms->search.s += (int)pmatch[0].rm_so; |
1677 | | -ms->search.offset += (size_t)pmatch[0].rm_so; |
1678 | | -ms->search.rm_len = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); |
1679 | | -v = 0; |
1680 | | -break; |
| 1724 | +/* pce now contains the compiled regex */ |
| 1725 | +zval *retval; |
| 1726 | +zval *subpats; |
| 1727 | +char *haystack; |
| 1728 | + |
| 1729 | +MAKE_STD_ZVAL(retval); |
| 1730 | +ALLOC_INIT_ZVAL(subpats); |
| 1731 | + |
| 1732 | +/* Cut the search len from haystack, equals to REG_STARTEND */ |
| 1733 | +haystack = estrndup(ms->search.s, ms->search.s_len); |
1681 | 1734 |
|
1682 | | -case REG_NOMATCH: |
1683 | | -v = 1; |
1684 | | -break; |
| 1735 | +/* match v = 0, no match v = 1 */ |
| 1736 | +php_pcre_match_impl(pce, haystack, ms->search.s_len, retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC); |
| 1737 | + |
| 1738 | +/* Free haystack */ |
| 1739 | +efree(haystack); |
| 1740 | + |
| 1741 | +if (Z_LVAL_P(retval) < 0) { |
| 1742 | +zval_ptr_dtor(&subpats); |
| 1743 | +FREE_ZVAL(retval); |
| 1744 | +efree(Z_STRVAL_P(pattern)); |
| 1745 | +efree(pattern); |
| 1746 | +return -1; |
| 1747 | +} else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats) == IS_ARRAY)) { |
| 1748 | + |
| 1749 | +/* Need to fetch global match which equals pmatch[0] */ |
| 1750 | +HashTable *ht = Z_ARRVAL_P(subpats); |
| 1751 | +HashPosition outer_pos; |
| 1752 | +zval *pattern_match = NULL, *pattern_offset = NULL; |
| 1753 | + |
| 1754 | +zend_hash_internal_pointer_reset_ex(ht, &outer_pos); |
| 1755 | + |
| 1756 | +if (zend_hash_has_more_elements_ex(ht, &outer_pos) == SUCCESS && |
| 1757 | +zend_hash_move_forward_ex(ht, &outer_pos)) { |
| 1758 | + |
| 1759 | +zval **ppzval; |
| 1760 | + |
| 1761 | +/* The first element (should be) is the global match |
| 1762 | + Need to move to the inner array to get the global match */ |
| 1763 | + |
| 1764 | +if (zend_hash_get_current_data_ex(ht, (void**)&ppzval, &outer_pos) != FAILURE) { |
| 1765 | + |
| 1766 | +HashTable *inner_ht; |
| 1767 | +HashPosition inner_pos; |
| 1768 | +zval **match, **offset; |
| 1769 | +zval tmpcopy = **ppzval, matchcopy, offsetcopy; |
| 1770 | + |
| 1771 | +zval_copy_ctor(&tmpcopy); |
| 1772 | +INIT_PZVAL(&tmpcopy); |
| 1773 | + |
| 1774 | +inner_ht = Z_ARRVAL(tmpcopy); |
| 1775 | + |
| 1776 | +/* If everything goes according to the master plan |
| 1777 | + tmpcopy now contains two elements: |
| 1778 | + 0 = the match |
| 1779 | + 1 = starting position of the match */ |
| 1780 | +zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos); |
| 1781 | + |
| 1782 | +if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS && |
| 1783 | +zend_hash_move_forward_ex(inner_ht, &inner_pos)) { |
| 1784 | + |
| 1785 | +if (zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) != FAILURE) { |
| 1786 | + |
| 1787 | +matchcopy = **match; |
| 1788 | +zval_copy_ctor(&matchcopy); |
| 1789 | +INIT_PZVAL(&matchcopy); |
| 1790 | +convert_to_string(&matchcopy); |
| 1791 | + |
| 1792 | +MAKE_STD_ZVAL(pattern_match); |
| 1793 | +Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy); |
| 1794 | +Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy); |
| 1795 | +Z_TYPE_P(pattern_match) = IS_STRING; |
| 1796 | + |
| 1797 | +zval_dtor(&matchcopy); |
| 1798 | +} |
| 1799 | +} |
| 1800 | + |
| 1801 | +if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS && |
| 1802 | +zend_hash_move_forward_ex(inner_ht, &inner_pos)) { |
| 1803 | + |
| 1804 | +if (zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) != FAILURE) { |
| 1805 | + |
| 1806 | +offsetcopy = **offset; |
| 1807 | +zval_copy_ctor(&offsetcopy); |
| 1808 | +INIT_PZVAL(&offsetcopy); |
| 1809 | +convert_to_long(&offsetcopy); |
| 1810 | + |
| 1811 | +MAKE_STD_ZVAL(pattern_offset); |
| 1812 | +Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy); |
| 1813 | +Z_TYPE_P(pattern_offset) = IS_LONG; |
| 1814 | + |
| 1815 | +zval_dtor(&offsetcopy); |
| 1816 | +} |
| 1817 | +} |
| 1818 | +zval_dtor(&tmpcopy); |
| 1819 | +} |
| 1820 | + |
| 1821 | +if ((pattern_match != NULL) && (pattern_offset != NULL)) { |
| 1822 | +ms->search.s += (int)Z_LVAL_P(pattern_offset); /* this is where the match starts */ |
| 1823 | +ms->search.offset += (size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */ |
| 1824 | +ms->search.rm_len = Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */; |
| 1825 | +v = 0; |
| 1826 | + |
| 1827 | +efree(pattern_match); |
| 1828 | +efree(pattern_offset); |
| 1829 | + |
| 1830 | +} else { |
| 1831 | +zval_ptr_dtor(&subpats); |
| 1832 | +FREE_ZVAL(retval); |
| 1833 | +efree(Z_STRVAL_P(pattern)); |
| 1834 | +efree(pattern); |
| 1835 | +return -1; |
| 1836 | +} |
| 1837 | +} |
1685 | 1838 |
|
1686 | | -default: |
1687 | | -(void)regerror(rc, &rx, errmsg, sizeof(errmsg)); |
1688 | | -file_magerror(ms, "regexec error %d, (%s)", rc, errmsg); |
1689 | | -v = (uint64_t)-1; |
1690 | | -break; |
| 1839 | + |
| 1840 | +} else { |
| 1841 | +v = 1; |
1691 | 1842 | } |
1692 | | -regfree(&rx); |
1693 | | -efree(pmatch); |
| 1843 | +zval_ptr_dtor(&subpats); |
| 1844 | +FREE_ZVAL(retval); |
1694 | 1845 | } |
1695 | | -if (v == (uint64_t)-1) { |
1696 | | -return -1; |
1697 | | -} |
1698 | | -break; |
| 1846 | +efree(Z_STRVAL_P(pattern)); |
| 1847 | +efree(pattern); |
| 1848 | +break; |
1699 | 1849 | } |
| 1850 | + |
| 1851 | + |
1700 | 1852 | default: |
1701 | 1853 | file_magerror(ms, "invalid type %d in magiccheck()", m->type); |
1702 | 1854 | return -1; |
|
0 commit comments