This paper mainly implements the algorithm of the string to match the LIKE. In the match, the match in the SQL Server is mainly manifested as the process of two wildcards, respectively, "_" represents a character, "%" represents any character. Since "%" is arbitrarily in the matching process, it is completely matched, and the wildcard "_" match should not participate in matching operations together, so we decided to put the substrings in "%" before matching, Segment matching, obviously reduces the difficulty of matching algorithm, explaining the implementation process of the algorithm: (post-attached to achieve source)
1. Determine the first "%" position,
Purpose: Determine mode matching
A> is the beginning of "%", no left match (left match, the first character of the substrings must be consistent with the first character of the original string)
B> Do not start with "%"
2. Perform mode matching of KMP * algorithm
The KMP algorithm cannot fully implement the matching algorithm mentioned in this article, we must correct this, mainly the mode factor is not suitable, and must think that "_" is consistent with the previous arbitrary character, so "_" There will be fewer, the less likely, the less likely, of course, the matching speed is faster than the mode of the teaching course.
3. Continue the matching of the next subtrial section.
The source code of the algorithm is provided below, divided into two functions,
_Strat
Find functions for KMP * mode string, which has its instructions before the function.
2. _Strlike
For the implementation of the Like, the key to the _strat mode string matching function is the key to the segment of the mode string to reduce the difficulty of matching.
// Function Name: INT _STRAT
// char * chtext,
// char * chpattern,
// int Nbegpos,
// int Nlen
// bool blef
/ / Implementation: Mode Search
// Effect on global variables: no
// Parameter Description:
// chtext
// chpattern mode string
// NBEGPOS start position
// NLEN's original string relative length
// BLEFT is left aligned (ie the first character must be consistent)
// Return the result Description: Actual location
// Waiting for one: The number of revolutions must not be greater than Nlen - Len (chpattern), that is, it cannot cause full match after the rebound.
// Standby 2: Calculation mode string and string search code merge, reduce calculation
Int _strat (char * chtext, char * chpattern, int nbegpos / * = 0 * /, int Nlen / * = -1 * /, bool bleft / * = false * /)
{
Int npatternlen = _tcslen (chpattern); int ntextlen = _tcslen (chtext);
IF (Nlen> = 0)
{
IF (Nbegpos Nlen Ntextlen = nbegpos nlen; } IF (nbegpos npatternlen> ntextlen || npatternlen> maxlen_pattern) Return -1; IF (npatternlen == 0) Return nbegpos; Else { INT nGenerallen = 0; Short chnext [maxlen_pattern] = {-1}; INT NPATTPOS = 0, NNEXT = -1; IF (! bleft) { / / Generate mode Reverse value While (Npattpos { IF (nNext == -1 || chpattern [npattpos] == '_' || chpattern [npattpos] == chpattern [nnext]) { Npattpos ; NNEXT ; ChnexT [npattpos] = nnext; } Else NNEXT = chnext [nnext]; } } INT nTextPos = nbegpos; Npattpos = 0; // Mode match While (npattpos { IF (npattpos == -1 || chpattern [npattpos] == '_' || chpattern [npattpos] == chtext [nTextPos]) { Npattpos ; NTEXTPOS ; } Else { / / Require left alignment, not allowed to retreat (definitely not left alignment) IF (bleft) Return -1; Else Npattpos = chnext [npattpos]; } } / / Decision mode string has been fully matched, otherwise returns -1 IF (npattpos == npatternlen) Return nTextpos - NPATTPOS; Else Return -1; } } // Function Name: BOOL _STRLIKE // char * chtext, // char * chpattern, // int nbegpos) // Implementation: Two strings matching algorithm, belt accessory // Effect on global variables: no // Parameter Description: // chtext original string // chpattern mode string // NBEGPOS start position // Return the result Description: // = true indicates similar or uniform // = false means not similar or inconsistent Bool _Strlike (char * chtext, char * chpattern, int nbegpos / * = 0 * /) { Bool bleftmatch = true, blast = false; INT NTEXTLEN = _TCSLEN (ChText); // is the most basic match, that is, the presence of the mode string, compare IF (_TCSLEN (chpattern) == 0) IF (_tcslen (chtext) == 0) Return True; Else Return False; DO { Char * chfirstpattern, * chsecondpattern; IF (chpattern [0] == '%') { DO { Chpattern ; WHILE (chpattern [0] == '%'); IF (chpattern == null || _tcslen (chpattern) == 0) Return True; BLEFTMATCH = FALSE; } Else BLEFTMATCH = TRUE; // Initialization mode string Chsecondpattern = _tcschr (chpattern, '%'); INT NPATTERNLEN; IF (chsecondpattern == null) { Blast = True; Npatternlen = _tcslen (chpattern); IF (! bleftmatch) { // If you start with a%, if you don't have the remaining mode string, just consider the way the right alignment matches (actually left aligned) IF (NBEGPOS NPATTERNLEN <= NTEXTLEN) { Nbegpos = nTextlen - Npatternlen; BLEFTMATCH = TRUE; } Else Return False; } Else IF (NBEGPOS NPATTERNLEN! = NTEXTLEN) Return False; } Else { // mode string must not be longer than the original string Npatternlen = chsecondpattern - chpattern; IF (NBEGPOS NPATTERNLEN> NTEXTLEN) Return False; } // Initialization mode string and modification remaining string CHFIRSTPATTERN = New Char [Npatternlen 1]; Memcpy (chfirstpattern, chpattern, npatternlen); CHFIRSTPATTERN [NPATTERNLEN] = 0; Chpattern = chsecondpattern; Int npos = _strat (chText, chfirstpattern, nbegpos, bleftmatch? npatternlen: NTextlen - Nbegpos, Bleftmatch); Delete chfirstpattern; IF (NPOS <0) { Return False; } Else { // Set the starting point of the next lookup location IF (BleftMatch) { IF (npos! = nbegpos) Return false;} Else Nbegpos = npos; IF (Blast) { IF (npatternlen npos == ntextlen) Return True; Else Return False; } Else NBEGPOS = NPATTERNLEN } WHILE (TRUE); }