| |
@@ -22,6 +22,10 @@
|
| |
#include <errno.h>
|
| |
#include <fcntl.h>
|
| |
#include <grp.h>
|
| |
+ #if HAVE_ICONV
|
| |
+ #include <iconv.h>
|
| |
+ #include <langinfo.h>
|
| |
+ #endif
|
| |
#include <inttypes.h>
|
| |
#include <limits.h>
|
| |
#include <locale.h>
|
| |
@@ -47,6 +51,10 @@
|
| |
#include "db.h"
|
| |
#include "lib.h"
|
| |
|
| |
+ #define FNMATCH_CHARS "*?[\\]"
|
| |
+ #define BASIC_REGEX_META_CHARS ".^$*[]\\-"
|
| |
+ #define EXTENDED_REGEX_META_CHARS BASIC_REGEX_META_CHARS "{}|+?()"
|
| |
+
|
| |
/* Check file existence before reporting them */
|
| |
static bool conf_check_existence; /* = false; */
|
| |
|
| |
@@ -60,6 +68,12 @@
|
| |
/* Ignore case when matching patterns */
|
| |
static bool conf_ignore_case; /* = false; */
|
| |
|
| |
+ /* Ignore accents when matching patterns */
|
| |
+ static bool conf_transliterate; /* = false; */
|
| |
+
|
| |
+ /* Ignore puncts and spaces when matching patterns */
|
| |
+ static bool conf_ingore_separators; /* = false; */
|
| |
+
|
| |
/* Return only files that match all patterns */
|
| |
static bool conf_match_all_patterns; /* = false; */
|
| |
|
| |
@@ -108,6 +122,11 @@
|
| |
/* Output only statistics */
|
| |
static bool conf_statistics; /* = false; */
|
| |
|
| |
+ #if HAVE_ICONV
|
| |
+ /* Iconv context for transliterate conversion */
|
| |
+ static iconv_t iconv_context; /* = NULL; */
|
| |
+ #endif
|
| |
+
|
| |
/* String utilities */
|
| |
|
| |
/* Convert SRC to upper-case wide string in OBSTACK;
|
| |
@@ -163,6 +182,242 @@
|
| |
return res;
|
| |
}
|
| |
|
| |
+ #if HAVE_ICONV
|
| |
+ static bool
|
| |
+ char_needs_escape (const char c)
|
| |
+ {
|
| |
+ if (conf_match_regexp_basic != false &&
|
| |
+ strchr (BASIC_REGEX_META_CHARS, c) != NULL)
|
| |
+ return true;
|
| |
+
|
| |
+ if (conf_match_regexp_basic != true &&
|
| |
+ strchr (EXTENDED_REGEX_META_CHARS, c) != NULL)
|
| |
+ return true;
|
| |
+
|
| |
+ return false;
|
| |
+ }
|
| |
+
|
| |
+ static char *
|
| |
+ escape_regex (const char *str, size_t len, size_t *escaped_len)
|
| |
+ {
|
| |
+ size_t i, j;
|
| |
+ size_t newlen;
|
| |
+ bool foundmeta;
|
| |
+ char *outbuf;
|
| |
+
|
| |
+ if (escaped_len)
|
| |
+ *escaped_len = 0;
|
| |
+
|
| |
+ if (conf_match_regexp != true)
|
| |
+ return NULL;
|
| |
+
|
| |
+ foundmeta = false;
|
| |
+ newlen = 0;
|
| |
+
|
| |
+ for (i = 0; str[i] && i < len; ++i)
|
| |
+ {
|
| |
+ if (char_needs_escape (str[i]))
|
| |
+ {
|
| |
+ foundmeta = true;
|
| |
+ ++newlen;
|
| |
+ }
|
| |
+ ++newlen;
|
| |
+ }
|
| |
+
|
| |
+ if (foundmeta != true || newlen == 0)
|
| |
+ return NULL;
|
| |
+
|
| |
+ outbuf = xmalloc (newlen + 1);
|
| |
+ outbuf[newlen] = '\0';
|
| |
+
|
| |
+ for (i = 0, j = 0; i < len && j < newlen; ++i)
|
| |
+ {
|
| |
+ if (char_needs_escape (str[i]))
|
| |
+ outbuf[j++] = '\\';
|
| |
+ outbuf[j++] = str[i];
|
| |
+ }
|
| |
+
|
| |
+ if (escaped_len)
|
| |
+ *escaped_len = newlen;
|
| |
+
|
| |
+ return outbuf;
|
| |
+ }
|
| |
+
|
| |
+ /* Use iconv to transliterate the string into ASCII chars, when possible.
|
| |
+ If a transliteration does not exist, we just use the actual symbol
|
| |
+ not to loose precision. */
|
| |
+ static char *
|
| |
+ transliterate_string (const char *str)
|
| |
+ {
|
| |
+ size_t strrlen;
|
| |
+ size_t inlen;
|
| |
+ size_t outleft;
|
| |
+ size_t transliteratedlen;
|
| |
+ size_t nonasciibytes;
|
| |
+ size_t i;
|
| |
+ bool changed;
|
| |
+ char *inbuf;
|
| |
+ char *outbuf;
|
| |
+ char *outptr;
|
| |
+
|
| |
+ changed = false;
|
| |
+ nonasciibytes = 0;
|
| |
+ strrlen = 0;
|
| |
+
|
| |
+ for (i = 0; str[i]; i++)
|
| |
+ {
|
| |
+ if (str[i] & 0x80)
|
| |
+ ++nonasciibytes;
|
| |
+
|
| |
+ ++strrlen;
|
| |
+ }
|
| |
+
|
| |
+ if (nonasciibytes < 1)
|
| |
+ return NULL;
|
| |
+
|
| |
+ inbuf = (char *) str;
|
| |
+ inlen = 1;
|
| |
+ transliteratedlen = 0;
|
| |
+ outleft = strrlen + nonasciibytes;
|
| |
+ outbuf = xmalloc (outleft);
|
| |
+ outptr = outbuf;
|
| |
+
|
| |
+ while (inbuf + inlen <= str + strrlen)
|
| |
+ {
|
| |
+ size_t convertedlen;
|
| |
+ size_t conversions;
|
| |
+ size_t symbollen;
|
| |
+ size_t outidx;
|
| |
+
|
| |
+ symbollen = inlen;
|
| |
+ conversions = iconv (iconv_context, &inbuf, &inlen, &outptr, &outleft);
|
| |
+ outidx = outptr - outbuf;
|
| |
+ convertedlen = outidx - transliteratedlen;
|
| |
+
|
| |
+ if (conversions == (size_t) -1)
|
| |
+ {
|
| |
+ if (errno == EILSEQ || errno == EINVAL)
|
| |
+ {
|
| |
+ inlen += 1;
|
| |
+ continue;
|
| |
+ }
|
| |
+ else if (errno == E2BIG)
|
| |
+ {
|
| |
+ outleft += 5;
|
| |
+ outbuf = xrealloc (outbuf, outidx + outleft);
|
| |
+ outptr = outbuf + outidx;
|
| |
+ continue;
|
| |
+ }
|
| |
+ error (0, errno, _("Impossible to transliterate string %s"), str);
|
| |
+ changed = false;
|
| |
+ break;
|
| |
+ }
|
| |
+ else if (conversions == 1 && convertedlen == 1 && outptr[-1] == '?')
|
| |
+ {
|
| |
+ /* Transliteration is not possible for this symbol, so we just
|
| |
+ reuse it as it is. */
|
| |
+ memcpy (outptr - 1, inbuf - symbollen, symbollen);
|
| |
+ convertedlen = symbollen;
|
| |
+ outptr += symbollen - 1;
|
| |
+ outleft -= symbollen - 1;
|
| |
+ }
|
| |
+ else if (conversions > 0)
|
| |
+ {
|
| |
+ if (conf_match_regexp != false && convertedlen > 0)
|
| |
+ {
|
| |
+ char *converted;
|
| |
+ char *escaped;
|
| |
+ size_t escaped_len;
|
| |
+
|
| |
+ converted = outptr - convertedlen;
|
| |
+ escaped = escape_regex (converted, convertedlen, &escaped_len);
|
| |
+
|
| |
+ if (escaped)
|
| |
+ {
|
| |
+ if (escaped_len > outleft)
|
| |
+ {
|
| |
+ outleft += (escaped_len - outleft);
|
| |
+ outbuf = xrealloc (outbuf, outidx + outleft);
|
| |
+ outptr = outbuf + outidx;
|
| |
+ converted = outptr - convertedlen;
|
| |
+ }
|
| |
+ memcpy (converted, escaped, escaped_len);
|
| |
+ free (escaped);
|
| |
+
|
| |
+ outptr += (escaped_len - convertedlen);
|
| |
+ outleft -= (escaped_len - convertedlen);
|
| |
+ convertedlen = escaped_len;
|
| |
+ }
|
| |
+ }
|
| |
+ changed = true;
|
| |
+ }
|
| |
+ transliteratedlen += convertedlen;
|
| |
+ inlen = 1;
|
| |
+ }
|
| |
+
|
| |
+ if (changed != true)
|
| |
+ {
|
| |
+ free (outbuf);
|
| |
+ return NULL;
|
| |
+ }
|
| |
+
|
| |
+ outbuf[transliteratedlen] = '\0';
|
| |
+ return outbuf;
|
| |
+ }
|
| |
+ #endif
|
| |
+
|
| |
+ /* Remove repeated punct or spaces from string and replaces
|
| |
+ them using a space*/
|
| |
+ static char *
|
| |
+ compress_string_separators (const char *str, bool is_pattern)
|
| |
+ {
|
| |
+ size_t strippedlen;
|
| |
+ size_t i;
|
| |
+ char *outbuf;
|
| |
+ bool first;
|
| |
+ bool changed;
|
| |
+
|
| |
+ changed = false;
|
| |
+ first = false;
|
| |
+ strippedlen = 0;
|
| |
+ outbuf = xmalloc (strlen (str) + 1);
|
| |
+
|
| |
+ for (i = 0; str[i]; i++)
|
| |
+ {
|
| |
+ char cnt;
|
| |
+
|
| |
+ cnt = str[i];
|
| |
+ if (isspace (cnt) || (ispunct (cnt) && cnt != '@' &&
|
| |
+ (!is_pattern || strchr (FNMATCH_CHARS, cnt) == NULL)))
|
| |
+ {
|
| |
+ if (first != false)
|
| |
+ {
|
| |
+ changed = true;
|
| |
+ continue;
|
| |
+ }
|
| |
+ if (cnt != ' ')
|
| |
+ {
|
| |
+ cnt = ' ';
|
| |
+ changed = true;
|
| |
+ }
|
| |
+ first = true;
|
| |
+ }
|
| |
+ else
|
| |
+ first = false;
|
| |
+
|
| |
+ outbuf[strippedlen++] = cnt;
|
| |
+ }
|
| |
+
|
| |
+ if (!changed)
|
| |
+ {
|
| |
+ free (outbuf);
|
| |
+ return NULL;
|
| |
+ }
|
| |
+
|
| |
+ outbuf[strippedlen] = '\0';
|
| |
+ return outbuf;
|
| |
+ }
|
| |
+
|
| |
/* Write STRING to stdout, replace unprintable characters with '?' */
|
| |
static void
|
| |
write_quoted (const char *string)
|
| |
@@ -432,7 +687,32 @@
|
| |
else
|
| |
matching = path;
|
| |
if (!string_matches_pattern (matching))
|
| |
- goto done;
|
| |
+ {
|
| |
+ char *altered_matching;
|
| |
+ bool matched;
|
| |
+
|
| |
+ altered_matching = NULL;
|
| |
+ matched = false;
|
| |
+ if (conf_ingore_separators != false)
|
| |
+ altered_matching = compress_string_separators (matching, false);
|
| |
+ #if HAVE_ICONV
|
| |
+ if (conf_transliterate != false)
|
| |
+ {
|
| |
+ char *old_altered = altered_matching;
|
| |
+ if (altered_matching)
|
| |
+ matching = altered_matching;
|
| |
+ altered_matching = transliterate_string (matching);
|
| |
+ free (old_altered);
|
| |
+ }
|
| |
+ #endif
|
| |
+ if (altered_matching != NULL)
|
| |
+ {
|
| |
+ matched = string_matches_pattern (altered_matching);
|
| |
+ free (altered_matching);
|
| |
+ }
|
| |
+ if (!matched)
|
| |
+ goto done;
|
| |
+ }
|
| |
/* Visible? */
|
| |
if (*visible == -1)
|
| |
*visible = check_directory_perms (path) == 0;
|
| |
@@ -632,6 +912,13 @@
|
| |
" -h, --help print this help\n"
|
| |
" -i, --ignore-case ignore case distinctions when matching "
|
| |
"patterns\n"
|
| |
+ " -p, --ignore-spaces ignore punctuation and spaces when "
|
| |
+ "matching patterns\n"
|
| |
+ #if HAVE_ICONV
|
| |
+ " -t, --transliterate ignore accents using iconv "
|
| |
+ "transliteration when\n"
|
| |
+ " matching patterns\n"
|
| |
+ #endif
|
| |
" -l, --limit, -n LIMIT limit output (or counting) to LIMIT "
|
| |
"entries\n"
|
| |
" -m, --mmap ignored, for backward compatibility\n"
|
| |
@@ -669,6 +956,8 @@
|
| |
{ "follow", no_argument, NULL, 'L' },
|
| |
{ "help", no_argument, NULL, 'h' },
|
| |
{ "ignore-case", no_argument, NULL, 'i' },
|
| |
+ { "ignore-spaces", no_argument, NULL, 'p' },
|
| |
+ { "transliterate", no_argument, NULL, 't' },
|
| |
{ "limit", required_argument, NULL, 'l' },
|
| |
{ "mmap", no_argument, NULL, 'm' },
|
| |
{ "quiet", no_argument, NULL, 'q' },
|
| |
@@ -691,7 +980,7 @@
|
| |
{
|
| |
int opt, idx;
|
| |
|
| |
- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehil:mn:qr:sw", options, &idx);
|
| |
+ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitpl:mn:qr:sw", options, &idx);
|
| |
switch (opt)
|
| |
{
|
| |
case -1:
|
| |
@@ -772,6 +1061,14 @@
|
| |
conf_ignore_case = true;
|
| |
break;
|
| |
|
| |
+ case 't':
|
| |
+ conf_transliterate = true;
|
| |
+ break;
|
| |
+
|
| |
+ case 'p':
|
| |
+ conf_ingore_separators = true;
|
| |
+ break;
|
| |
+
|
| |
case 'l': case 'n':
|
| |
{
|
| |
char *end;
|
| |
@@ -822,6 +1119,22 @@
|
| |
error (EXIT_FAILURE, 0,
|
| |
_("non-option arguments are not allowed with --%s"),
|
| |
conf_statistics != false ? "statistics" : "regexp");
|
| |
+ if (conf_ingore_separators != false && conf_match_regexp != false)
|
| |
+ error (EXIT_FAILURE, 0,
|
| |
+ _("ignore-spaces is not supported when using regexp"));
|
| |
+ if (conf_transliterate != false)
|
| |
+ {
|
| |
+ #if HAVE_ICONV
|
| |
+ iconv_context = iconv_open ("ASCII//TRANSLIT", nl_langinfo (CODESET));
|
| |
+ if (iconv_context == (iconv_t) -1)
|
| |
+ error (EXIT_FAILURE, errno, _("can not do transliteration between " \
|
| |
+ "these locales: `%s' and `ASCII'"),
|
| |
+ nl_langinfo (CODESET));
|
| |
+ #else
|
| |
+ error (EXIT_FAILURE, errno, _("transliteration support is not supported" \
|
| |
+ "by this build of %s"), program_name);
|
| |
+ #endif
|
| |
+ }
|
| |
}
|
| |
|
| |
/* Parse arguments in ARGC, ARGV. Exit on error. */
|
| |
@@ -834,6 +1147,30 @@
|
| |
string_list_append (&conf_patterns, argv[i]);
|
| |
if (conf_statistics == false && conf_patterns.len == 0)
|
| |
error (EXIT_FAILURE, 0, _("no pattern to search for specified"));
|
| |
+ if (conf_transliterate != false || conf_ingore_separators != false)
|
| |
+ {
|
| |
+ char *altered_pattern;
|
| |
+ size_t patterns_len = conf_patterns.len;
|
| |
+
|
| |
+ for (i = 0; i < patterns_len; i++)
|
| |
+ {
|
| |
+ if (conf_ingore_separators != false)
|
| |
+ {
|
| |
+ altered_pattern =
|
| |
+ compress_string_separators (conf_patterns.entries[i], true);
|
| |
+ if (altered_pattern)
|
| |
+ conf_patterns.entries[i] = altered_pattern;
|
| |
+ }
|
| |
+ #if HAVE_ICONV
|
| |
+ if (conf_transliterate != false)
|
| |
+ {
|
| |
+ altered_pattern = transliterate_string (conf_patterns.entries[i]);
|
| |
+ if (altered_pattern)
|
| |
+ string_list_append (&conf_patterns, altered_pattern);
|
| |
+ }
|
| |
+ #endif
|
| |
+ }
|
| |
+ }
|
| |
conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len,
|
| |
sizeof (*conf_patterns.entries));
|
| |
if (conf_match_regexp != false)
|
| |
@@ -871,7 +1208,7 @@
|
| |
for (i = 0; i < conf_patterns.len; i++)
|
| |
{
|
| |
conf_patterns_simple[i] = strpbrk (conf_patterns.entries[i],
|
| |
- "*?[\\]") == NULL;
|
| |
+ FNMATCH_CHARS) == NULL;
|
| |
if (conf_patterns_simple[i] != false)
|
| |
conf_have_simple_pattern = true;
|
| |
}
|
| |
@@ -1042,6 +1379,10 @@
|
| |
handle_dbpath_entry (conf_dbpath.entries[i]);
|
| |
}
|
| |
done:
|
| |
+ #if HAVE_ICONV
|
| |
+ if (conf_transliterate != false && iconv_context)
|
| |
+ iconv_close (iconv_context);
|
| |
+ #endif
|
| |
if (conf_output_count != false)
|
| |
printf ("%ju\n", matches_found);
|
| |
if (conf_statistics != false || matches_found != 0)
|
| |
It allows to locate in files ignoring in-words separators, such as punctuation and spaces, so searching "foo-bar" is like searching for "foo bar", "foo_bar" or "foo?bar".
This is not compatible with regex.
This PR has https://pagure.io/mlocate/pull-request/41 as perequisite.