Решение, предоставленное Mehrdad, имеет экспоненциальное время выполнения, потому что оно использует возврат.Требуется секунда, чтобы понять, что "*a*??????*a*?????????a???????????????", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
соответствует.Также нет способа сопоставить строку с '*' или '?'в нем
Вот реализация O (nm), которую я придумал, которая использует O (n) памяти, где n - длина выражения, m - длина строкиОн также поддерживает экранирование?, * И \.
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/**
* Wildcard matching.
* See for example
* http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/find_c_search_wildcard.mspx?mfr=true
*
* * matches any amount of any characters,
* ? matches any single character.
* c matches c.
* No escaping of * and ? implemented (these are not allowed in windows filenames anyways).
*
* Backtracking O(2^n) implementation.
*
* from /2301597/sopostavlenie-imeni-faila-s-podstanovochnym-znakom
* http://stackoverflow.com/a/12231681/524504
*/
template<class Pattern, class Text>
bool wildcard(
Pattern const pat_begin,
Pattern const pat_end,
Text text_begin,
Text const text_end)
{
ptrdiff_t const pat_size = pat_end - pat_begin;
/* initial pattern position stack (offsets into the pattern string) and its size c */
ptrdiff_t stackbuf[64];
size_t c = sizeof(stackbuf) / sizeof(*stackbuf);
/* Stack base. Can be realloc'ed to some new memory location if we need more */
ptrdiff_t *p = stackbuf;
/* pointer to the location in the stack */
size_t n = 0;
p[n++] = 0; /* position 0 in the stack not used */
/* text_begin updated to skip everything successfully consumed */
while (n > 0 && text_begin != text_end)
{
for (size_t i = 0; i < n; i++)
{
/* if we're at the end of the pattern, but not at the end of the
* string, we might have done something wrong.
*/
if (p[i] == pat_size)
{
p[i--] = p[--n];
continue;
}
/* current pattern character */
switch (*(pat_begin + p[i]))
{
case '?': ++p[i]; break; /* simply advance pattern pointer */
case '*':
ptrdiff_t off;
off = p[i];
while (off < pat_size &&
*(pat_begin + off) == '*')
{ ++off; }
/* if the stack is full, reallocate */
if (n == c)
{
ptrdiff_t const *const old = p;
c *= 2;
/* assert positive size
* stack size never reduced anyways?
*/
if (c == 0) { ++c; }
size_t const size = c * sizeof(*p);
/* cannot use realloc to copy original stack
* (ptr in realloc must be
* "Pointer to a memory block previously
* allocated with malloc, calloc or realloc."
* must do manually
*/
p = (ptrdiff_t *)realloc(
old == stackbuf ? NULL : p,
size);
if (old == stackbuf)
{ memcpy(p, old, n * sizeof(*old)); }
}
/* store offset */
p[n++] = off;
break;
default: /* a normal character in the pattern */
/* must be matched exactly */
if (*(pat_begin + p[i]) == *text_begin)
{ ++p[i]; } /* advance pattern pointer */
else /* if not, backtrack */
{ p[i--] = p[--n]; }
break;
}
}
++text_begin;
}
bool success = false;
if (text_begin == text_end)
{
while (!success && n > 0)
{
--n;
while (p[n] != pat_size &&
*(pat_begin + p[n]) == '*')
{ ++p[n]; }
if (p[n] == pat_size)
{ success = true; }
}
}
/* need to free stack if it was reallocated */
if (p != stackbuf) { free(p); }
return success;
}
bool wildcard(char const *const pattern, char const *const text)
{
return wildcard(
pattern,
pattern + (pattern ? strlen(pattern) : 0),
text,
text + (text ? strlen(text) : 0));
}
bool wildcard(wchar_t const *const pattern, wchar_t const *const text)
{
return wildcard(
pattern,
pattern + (pattern ? wcslen(pattern) : 0),
text,
text + (text ? wcslen(text) : 0));
}
/**
* Virtual Machine Style Regular Expression parsing/NFA emulation
* used for wildcard matching. O(nm) algorithm.
*
* See http://swtch.com/~rsc/regexp/ for more on efficient RegEx parsing.
*
* Copyright (c) March 29, 2013 Paul Frischknecht
* Can be distributed under the MIT licence, see bottom of file.
*/
//#define wildcard_fast_DEBUG /* define to make the alogorithm printf what its doing */
/**
* Instructions are:
*
* star
* This launches a new thread at the current position and continues with the
* current thread at the next position accepting any character.
* char c
* Accepts exactly the character c
* anychar
* Accepts any character.
* end
* Accepts the end of the input string.
*/
enum InstructionType {
End = 0,
Star = 1,
Char = 2,
AnyChar = 3
};
struct Instruction {
InstructionType i;
int c; /* the caracter this instruction matches - undefined for non Char */
int threads; /* threads active in the given instruction */
/*
* storing this here allows us to find out wether there is a thread
* active at a given instruction in O(1)
*/
};
/**
* Wildcard (file path) matching.
* See for example
* http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/find_c_search_wildcard.mspx?mfr=true
*
* * matches any amount of any characters,
* ? matches any single character.
* c matches c.
*
* Escaping of '*', '?' and '\' via '\*', '\?' and '\\' implemented.
* All other bytes are recognized directly as is.
* The string may not end in a single (uneven amount of) '\'.
*
* If text_end is 0, the text is assumed to be 0 terminated.
* Same for pattern_end. The end is exclusive.
*
* @return A pointer to the character after the last character matched.
*
* Virtual machine O(nm) implementation, see http://swtch.com/~rsc/regexp/regexp2.html
*
* TODO Factor out the precompilation step to speed up matching multiple strings
* against the same expression.
*/
template<class Pattern, class Text>
Text wildcard_fast(
Pattern const pat_begin,
Pattern const pat_end,
Text const text_begin,
Text const text_end)
{
/*
* give some reasonable default program size so we don't have to call
* malloc in most cases
*/
#define DEFAULTonstack_program_SIZE 256
Instruction onstack_program[DEFAULTonstack_program_SIZE];
/* this stores the current run and next run thread list, alternating */
/* there are as most as many threads as instructions */
Instruction* onstack_threads[DEFAULTonstack_program_SIZE*2];
Instruction** threads = onstack_threads;
Instruction* program = onstack_program;
int program_size = sizeof(onstack_program)/sizeof(*program);
Instruction* program_last_instruction = program + program_size - 1;
/* program and pattern pointers */
Instruction* pp = program;
Pattern patp = pat_begin;
/* compile */
while ((pat_end == 0 && *patp != 0) || (pat_end != 0 && patp != pat_end)) {
/* need more space */
if (pp == program_last_instruction) {
Instruction* old_program = program;
Instruction** old_threads = threads;
int old_program_size = program_size;
program_size *= 2;
program = (Instruction*) malloc(program_size*sizeof(*program));
threads = (Instruction**)malloc(program_size*sizeof(*threads)*2);
memcpy(program, old_program, old_program_size*sizeof(*program));
if (old_program != onstack_program) {
free(old_program); free(old_threads);
}
program_last_instruction = program + program_size - 1;
pp = pp - old_program + program;
}
/* parse pattern */
switch (*patp) {
case '*':
pp->i = Star;
/* Optimize multiple stars away */
while ((pat_end == 0 || patp+1 != pat_end) && *(patp+1) == '*')
patp++;
break;
case '?':
pp->i = AnyChar;
break;
case '\\':
pp->i = Char;
pp->c = *(++patp); /* assumes string does not end in \ */
break;
default:
pp->i = Char;
pp->c = *patp;
break;
}
pp->threads = 0;
pp++;
patp++;
}
/* add the End instruction at the end */
program_last_instruction = pp;
pp->i = End;
pp->threads = 0;
/* run */
Text sp = text_begin; /* input string pointer */
int n = 1, c = 0; /* next and current index */
int threadcount[2];
/* initialize */
threadcount[c] = 1;
threads[0] = program;
threads[0]->threads++;
/* run over text */
while ((text_end == 0 && *sp != 0) || (text_end != 0 && sp != text_end)) {
/* unless recreated, all threads will die */
threadcount[n] = 0;
/* run over threads */
for (int i = 0; i < threadcount[c]; i++) {
Instruction* inst = threads[2*i+c];
switch (inst->i) {
case End:
/* we may not reach end early */
/* kill this thread without recrating it */
inst->threads--;
continue; /* with for loop */
case Char:
if (*sp != inst->c) {
/* if the character is not matched, kill this thread */
inst->threads--;
continue;
}
break;
case Star:
/* spawn off additional thread at current location */
if (inst->threads == 1) {
/* only if there's noone active there yet */
threads[2*(threadcount[n]++)+n] = inst;
inst->threads++;
}
break;
}
/*
* common actions: increase program counter for current thread,
* decrese amount of threads in last (current) instruction.
*/
inst->threads--;
inst++;
inst->threads++;
/* respawn at new location in next iteration */
threads[2*(threadcount[n]++)+n] = inst;
if (inst->i == Star && (inst+1)->threads == 0) {
/*
* already follow no-match option to give us
* more stuff to do
*/
threads[2*(threadcount[n]++)+n] = inst+1;
(inst+1)->threads++;
}
#ifdef wildcard_fast_DEBUG
for (int i = 0 ; i < threadcount[n]; i++) {
printf("thread %d at %d.\n", i, threads[2*i+n]-program);
}
#endif
}
#ifdef wildcard_fast_DEBUG
const char *ns[] = {
"end",
"star",
"char",
"anychar",
};
for (Instruction* p = program; p->i; p++) {
printf("%d. %s %c (%d threads)\n", p-program, ns[p->i], p->i == Char ? p->c : ' ', p->threads);
}
#endif
/* swap next and current and advance */
n = c;
c = !c;
sp++;
}
/*
* if there is no thread active in the End instruction when the
* end of the input was reached, this was no match
*/
if (program_last_instruction->threads == 0) sp = 0;
if (program != onstack_program) {
/* only need to free if we used malloc */
free(program); free(threads);
}
return sp;
}
char const* wildcard_fast(
char const *const pattern,
char const *const text)
{
return wildcard_fast(
pattern,
(const char*)0,
text,
(const char*)0);
}
wchar_t const* wildcard_fast(
wchar_t const *const pattern,
wchar_t const *const text)
{
return wildcard_fast(
pattern,
(const wchar_t*)0,
text,
(const wchar_t*)0);
}
/* tests */
#ifndef INCLUDE_IMPLEMENTATION
/*
* I just include this file in my projects and define this.
* That works well for simple algorithms like this
*/
#include <stdio.h>
#include <time.h>
int main() {
struct {
char* p;
char* t;
bool expected_result;
} test[] = {
{
"",
"",
true
},
{
"a",
"",
false
},
{
"",
"a",
false
},
{
"a",
"a",
true
},
{
"****.txt",
"hello.txt",
true
},
{
"*.txt",
"hello.tzt",
false
},
{
"*.t?t*",
"hello.tzt",
true
},
{
"*.*",
"hi.there",
true
},
/* the wildcard implementation will fail this as it doesn't understand escaping */
{
"\\*who\\?\\?.*\\\\",
"*who??.there\\",
true
},
/* these take extraordinaryly long on the O(2^n) implementation */
{
"**a*************************??????***a************************?????????a???????????????",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
true
},
/* runs a bit better if we omit the extra *'s. The fast implementation already optimizes these away. */
{
"*a*??????*a*?????????a???????????????",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
true
},
{0,0} /* marks end of tests */
};
int t0 = clock();
const char* result;
const char* r[] = {"no match", "match"};
for (int i = 0; test[i].p; i++) {
printf("=== Test %d: Matching '%s' against '%s'...===\n", i, test[i].t, test[i].p);
/* wildcard_fast */
t0 = clock();
result = wildcard_fast(test[i].p, test[i].t);
printf(" wildcard_fast (took %d ms):\n", clock()-t0);
if (!!result != test[i].expected_result)
printf(" Test failed (reported %s instead of expected %s).\n",
r[!!result], r[test[i].expected_result]);
else if (result) {
printf(" %s\n", test[i].t);
printf(" %*.c\n", result-test[i].t+1, '^');
}
else
printf(" No match.\n");
/* wildcard */
t0 = clock();
result = (const char*)
wildcard(test[i].p, test[i].t);
printf(" wildcard (took %d ms):\n", clock()-t0);
if (!!result != test[i].expected_result)
printf(" Test failed (reported %s instead of expected %s).\n",
r[!!result], r[test[i].expected_result]);
else if (result) printf(" Match.\n");
else printf(" No match.\n");
printf("\n");
}
}
#endif
/*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the
* Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute,
* sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall
* be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
* OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
. Используется подход виртуальной машины.См. http://swtch.com/~rsc/regexp/ для получения дополнительной информации об эффективном анализе регулярного выражения.