kjs Library API Documentation

regexp.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2001 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Lesser General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Lesser General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Lesser General Public
00017  *  License along with this library; if not, write to the Free Software
00018  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #include "regexp.h"
00023 
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 #include <string.h>
00027 
00028 using namespace KJS;
00029 
00030 RegExp::RegExp(const UString &p, int f)
00031   : pattern(p), flgs(f), m_notEmpty(false)
00032 {
00033 #ifdef HAVE_PCRE
00034   int pcreflags = 0;
00035   const char *perrormsg;
00036   int errorOffset;
00037 
00038   if (flgs & IgnoreCase)
00039     pcreflags |= PCRE_CASELESS;
00040 
00041   if (flgs & Multiline)
00042     pcreflags |= PCRE_MULTILINE;
00043 
00044   pcregex = pcre_compile(p.ascii(), pcreflags,
00045              &perrormsg, &errorOffset, NULL);
00046 #ifndef NDEBUG
00047   if (!pcregex)
00048     fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);
00049 #endif
00050 
00051 #ifdef PCRE_INFO_CAPTURECOUNT
00052   // Get number of subpatterns that will be returned
00053   int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
00054   if (rc != 0)
00055 #endif
00056     nrSubPatterns = 0; // fallback. We always need the first pair of offsets.
00057 
00058 #else /* HAVE_PCRE */
00059 
00060   nrSubPatterns = 0; // determined in match() with POSIX regex.
00061   int regflags = 0;
00062 #ifdef REG_EXTENDED
00063   regflags |= REG_EXTENDED;
00064 #endif
00065 #ifdef REG_ICASE
00066   if ( f & IgnoreCase )
00067     regflags |= REG_ICASE;
00068 #endif
00069 
00070   //NOTE: Multiline is not feasible with POSIX regex.
00071   //if ( f & Multiline )
00072   //    ;
00073   // Note: the Global flag is already handled by RegExpProtoFunc::execute
00074 
00075   regcomp(&preg, p.ascii(), regflags);
00076   /* TODO check for errors */
00077 #endif
00078 
00079 }
00080 
00081 RegExp::~RegExp()
00082 {
00083 #ifdef HAVE_PCRE
00084   if (pcregex)
00085     pcre_free(pcregex);
00086 #else
00087   /* TODO: is this really okay after an error ? */
00088   regfree(&preg);
00089 #endif
00090 }
00091 
00092 UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
00093 {
00094   if (i < 0)
00095     i = 0;
00096   if (ovector)
00097     *ovector = 0L;
00098   int dummyPos;
00099   if (!pos)
00100     pos = &dummyPos;
00101   *pos = -1;
00102   if (i > s.size() || s.isNull())
00103     return UString::null;
00104 
00105 #ifdef HAVE_PCRE
00106   CString buffer(s.cstring());
00107   int bufferSize = buffer.size();
00108   int ovecsize = (nrSubPatterns+1)*3; // see pcre docu
00109   if (ovector) *ovector = new int[ovecsize];
00110   if (!pcregex)
00111     return UString::null;
00112 
00113   if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i,
00114                 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0, // see man pcretest
00115                 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00116   {
00117     // Failed to match.
00118     if ((flgs & Global) && m_notEmpty && ovector)
00119     {
00120       // We set m_notEmpty ourselves, to look for a non-empty match
00121       // (see man pcretest or pcretest.c for details).
00122       // So this is not the end. We want to try again at i+1.
00123       // We won't be at the end of the string - that was checked before setting m_notEmpty.
00124       fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n");
00125       m_notEmpty = 0;
00126       if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0,
00127                     ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00128         return UString::null;
00129     }
00130     else // done
00131       return UString::null;
00132   }
00133 
00134   if (!ovector)
00135     return UString::null; // don't rely on the return value if you pass ovector==0
00136 #else
00137   const int maxMatch = 10;
00138   regmatch_t rmatch[maxMatch];
00139 
00140   char *str = strdup(s.ascii()); // TODO: why ???
00141   if (regexec(&preg, str + i, maxMatch, rmatch, 0)) {
00142     free(str);
00143     return UString::null;
00144   }
00145   free(str);
00146 
00147   if (!ovector) {
00148     *pos = rmatch[0].rm_so + i;
00149     return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
00150   }
00151 
00152   // map rmatch array to ovector used in PCRE case
00153   nrSubPatterns = 0;
00154   for(int j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++)
00155       nrSubPatterns++;
00156   int ovecsize = (nrSubPatterns+1)*3; // see above
00157   *ovector = new int[ovecsize];
00158   for (int j = 0; j < nrSubPatterns + 1; j++) {
00159     if (j>maxMatch)
00160       break;
00161     (*ovector)[2*j] = rmatch[j].rm_so + i;
00162     (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
00163   }
00164 #endif
00165 
00166   *pos = (*ovector)[0];
00167 #ifdef HAVE_PCRE  // TODO check this stuff in non-pcre mode
00168   if ( *pos == (*ovector)[1] && (flgs & Global) && *pos != bufferSize )
00169   {
00170     // empty match, not at end of string.
00171     // Next try will be with m_notEmpty=true
00172     m_notEmpty=true;
00173   }
00174 #endif
00175   return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
00176 }
00177 
00178 #if 0 // unused
00179 bool RegExp::test(const UString &s, int)
00180 {
00181 #ifdef HAVE_PCRE
00182   int ovector[300];
00183   CString buffer(s.cstring());
00184 
00185   if (s.isNull() ||
00186       pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
00187         0, ovector, 300) == PCRE_ERROR_NOMATCH)
00188     return false;
00189   else
00190     return true;
00191 
00192 #else
00193 
00194   char *str = strdup(s.ascii());
00195   int r = regexec(&preg, str, 0, 0, 0);
00196   free(str);
00197 
00198   return r == 0;
00199 #endif
00200 }
00201 #endif
KDE Logo
This file is part of the documentation for kdelibs Version 3.1.4.
Documentation copyright © 1996-2002 the KDE developers.
Generated on Sun Feb 27 22:15:18 2005 by doxygen 1.3.4 written by Dimitri van Heesch, © 1997-2001