diff -r 19854df2b7c2 ircd/match.c --- a/ircd/match.c Mon Jan 26 18:42:26 2009 +0000 +++ b/ircd/match.c Mon Jan 26 18:48:52 2009 +0000 @@ -27,6 +27,9 @@ #include "ircd_string.h" #include "ircd_snprintf.h" +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + /* * mmatch() * @@ -62,53 +65,73 @@ * * @param[in] old_mask One wildcard mask. * @param[in] new_mask Another wildcard mask. + * @param[in] case_sensitive Indicate case sensitivity * @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise. */ -int mmatch(const char *old_mask, const char *new_mask) +__inline__ int _mmatch(const char *old_mask, const char *new_mask, int rfcmatch) { const char *m = old_mask; const char *n = new_mask; - const char *ma = m; - const char *na = n; - int wild = 0; - int mq = 0, nq = 0; + /* Note that ma / na never point to a character escaped by a backslash. */ + const char *ma = NULL; /* Remembered m for backtracking. */ + const char *na = NULL; + int mq = 0, nq = 0; /* Is *m / *n escaped? */ + int match; + + if ( m[0] == '*' && m[1] == '\0' ) { + return 0; + } else if ( n[0] == '*' && n[1] == '\0' ) { + return 1; + } while (1) { - if (*m == '*') + if (unlikely(*m == '*')) { + /* Optimization: Skip redundant *'s */ while (*m == '*') m++; - wild = 1; + /* And remember this position for backtracking. */ ma = m; na = n; } - if (!*m) + if (unlikely(!*m)) { if (!*n) return 0; + /* This construct speeds up matches of patterns ending with a * + * followed by any number of ?. The tricky part is figuring + * out whether or not that * was escaped. */ for (m--; (m > old_mask) && (*m == '?'); m--) - ; - if ((*m == '*') && (m > old_mask) && (m[-1] != '\\')) - return 0; - if (!wild) + ; /* Skip trailing ?'s */ + if (*m == '*') { + if ((--m >= old_mask) && (*m != '\\')) + return 0; + /* Now if there's an odd number of backslashes, the for loop + * breaks out and we backtrack. */ + if (!rfcmatch) /* In rfc, backslashes can't be escaped. */ + for(--m; (m >= old_mask) && (*m == '\\'); m--) + if ((--m >= old_mask) && (*m != '\\')) + return 0; + } + if (!ma) return 1; m = ma; - - /* Added to `mmatch' : Because '\?' and '\*' now is one character: */ - if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?'))) + /* skip one escaped character */ + if (*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?')) ++na; - n = ++na; } - else if (!*n) + + if (unlikely(!*n)) { - while (*m == '*') + while (unlikely(*m == '*')) m++; return (*m != 0); } - if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?'))) + + if (unlikely(*m == '\\' && (!rfcmatch || m[1] == '*' || m[1] == '?'))) { m++; mq = 1; @@ -116,8 +139,7 @@ else mq = 0; - /* Added to `mmatch' : Because '\?' and '\*' now is one character: */ - if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?'))) + if (unlikely(*n == '\\' && (!rfcmatch || n[1] == '*' || n[1] == '?'))) { n++; nq = 1; @@ -126,45 +148,47 @@ nq = 0; /* - * This `if' has been changed compared to match() to do the following: - * Match when: - * old (m) new (n) boolean expression - * * any (*m == '*' && !mq) || - * ? any except '*' (*m == '?' && !mq && (*n != '*' || nq)) || - * any except * or ? same as m (!((*m == '*' || *m == '?') && !mq) && - * ToLower(*m) == ToLower(*n) && - * !((mq && !nq) || (!mq && nq))) - * - * Here `any' also includes \* and \? ! - * - * After reworking the boolean expressions, we get: - * (Optimized to use boolean short-circuits, with most frequently occurring - * cases upfront (which took 2 hours!)). + * There was fancy short-circuit logic here. It got killed. Fuck 2 hours. + * It was probably slower than the branches here now. Nobody will notice + * in any case. -- BP */ - if ((*m == '*' && !mq) || - ((!mq || nq) && ToLower(*m) == ToLower(*n)) || - (*m == '?' && !mq && (*n != '*' || nq))) + if (unlikely(mq)) { /* m is quoted, match the exact same, or the + * same character if quoting is irrelevant. */ + match = (*m == *n && (nq || + (*n != '*' && *n != '?' && ToUpper(*n) == ToLower(*n)))); + } else if (unlikely(*m == '?')) { /* m is '?', match anything but unquoted '*' */ + match = (*n != '*' || nq); + } else if (unlikely(*m == '*')) { /* m is '*', match. */ + match=1; + } else /* m is neither quoted nor special */ + { + match = (ToLower(*m) == ToLower(*n)); + } + + if (unlikely(match)) { if (*m) m++; if (*n) n++; } - else + else { - if (!wild) + if (unlikely(!ma)) return 1; m = ma; - - /* Added to `mmatch' : Because '\?' and '\*' now is one character: */ - if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?'))) + /* skip one escaped character */ + if (unlikely(*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?'))) ++na; - n = ++na; } } } +int mmatch(const char *old_mask, const char *new_mask) { + return _mmatch(old_mask, new_mask, 1); +} + /* * Compare if a given string (name) matches the given * mask (which can contain wild cards: '*' - match any @@ -186,7 +210,7 @@ * @param[in] name String to check against \a mask. * @return Zero if \a mask matches \a name, non-zero if no match. */ -int match(const char *mask, const char *name) +__inline__ int _match(const char *mask, const char *name, int rfcmatch) { const char *m = mask, *n = name; const char *m_tmp = mask, *n_tmp = name; @@ -205,10 +229,14 @@ return 1; break; case '\\': - m++; /* allow escaping to force capitalization */ - if (*m++ != *n++) - goto backtrack; + if (!rfcmatch) { + m++; + if (*m++ != *n++) + goto backtrack; + } else { + goto fallthrough; + } break; case '*': case '?': for (star_p = 0; ; m++) { @@ -234,6 +262,7 @@ } /* and fall through */ default: + fallthrough: if (!*n) return *m != '\0'; if (ToLower(*m) != ToLower(*n)) @@ -244,6 +273,11 @@ } } +int match(const char *mask, const char *name) +{ + return _match(mask, name, 1); +} + /* * collapse() * Collapse a pattern string into minimal components.