kjs Library API Documentation

string_object.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2001 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Lesser General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Lesser General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Lesser General Public
00017  *  License along with this library; if not, write to the Free Software
00018  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #include "value.h"
00023 #include "object.h"
00024 #include "types.h"
00025 #include "interpreter.h"
00026 #include "operations.h"
00027 #include "regexp.h"
00028 #include "regexp_object.h"
00029 #include "string_object.h"
00030 #include "error_object.h"
00031 #include <stdio.h>
00032 #include "string_object.lut.h"
00033 
00034 using namespace KJS;
00035 
00036 // ------------------------------ StringInstanceImp ----------------------------
00037 
00038 const ClassInfo StringInstanceImp::info = {"String", 0, 0, 0};
00039 
00040 StringInstanceImp::StringInstanceImp(const Object &proto)
00041   : ObjectImp(proto)
00042 {
00043   setInternalValue(String(""));
00044 }
00045 
00046 // ------------------------------ StringPrototypeImp ---------------------------
00047 const ClassInfo StringPrototypeImp::info = {"String", &StringInstanceImp::info, &stringTable, 0};
00048 /* Source for string_object.lut.h
00049 @begin stringTable 26
00050   toString      StringProtoFuncImp::ToString    DontEnum|Function   0
00051   valueOf       StringProtoFuncImp::ValueOf DontEnum|Function   0
00052   charAt        StringProtoFuncImp::CharAt  DontEnum|Function   1
00053   charCodeAt        StringProtoFuncImp::CharCodeAt  DontEnum|Function   1
00054   concat        StringProtoFuncImp::Concat  DontEnum|Function   1
00055   indexOf       StringProtoFuncImp::IndexOf DontEnum|Function   2
00056   lastIndexOf       StringProtoFuncImp::LastIndexOf DontEnum|Function   2
00057   match         StringProtoFuncImp::Match   DontEnum|Function   1
00058   replace       StringProtoFuncImp::Replace DontEnum|Function   2
00059   search        StringProtoFuncImp::Search  DontEnum|Function   1
00060   slice         StringProtoFuncImp::Slice   DontEnum|Function   2
00061   split         StringProtoFuncImp::Split   DontEnum|Function   2
00062   substr        StringProtoFuncImp::Substr  DontEnum|Function   2
00063   substring     StringProtoFuncImp::Substring   DontEnum|Function   2
00064   toLowerCase       StringProtoFuncImp::ToLowerCase DontEnum|Function   0
00065   toUpperCase       StringProtoFuncImp::ToUpperCase DontEnum|Function   0
00066 #
00067 # Under here: html extension, should only exist if KJS_PURE_ECMA is not defined
00068 # I guess we need to generate two hashtables in the .lut.h file, and use #ifdef
00069 # to select the right one... TODO. #####
00070   big           StringProtoFuncImp::Big     DontEnum|Function   0
00071   small         StringProtoFuncImp::Small   DontEnum|Function   0
00072   blink         StringProtoFuncImp::Blink   DontEnum|Function   0
00073   bold          StringProtoFuncImp::Bold    DontEnum|Function   0
00074   fixed         StringProtoFuncImp::Fixed   DontEnum|Function   0
00075   italics       StringProtoFuncImp::Italics DontEnum|Function   0
00076   strike        StringProtoFuncImp::Strike  DontEnum|Function   0
00077   sub           StringProtoFuncImp::Sub     DontEnum|Function   0
00078   sup           StringProtoFuncImp::Sup     DontEnum|Function   0
00079   fontcolor     StringProtoFuncImp::Fontcolor   DontEnum|Function   1
00080   fontsize      StringProtoFuncImp::Fontsize    DontEnum|Function   1
00081   anchor        StringProtoFuncImp::Anchor  DontEnum|Function   1
00082   link          StringProtoFuncImp::Link    DontEnum|Function   1
00083 @end
00084 */
00085 // ECMA 15.5.4
00086 StringPrototypeImp::StringPrototypeImp(ExecState *exec,
00087                                        ObjectPrototypeImp *objProto)
00088   : StringInstanceImp(Object(objProto))
00089 {
00090   Value protect(this);
00091   // The constructor will be added later, after StringObjectImp has been built
00092   put(exec,"length",Number(0),DontDelete|ReadOnly|DontEnum);
00093 
00094 }
00095 
00096 Value StringPrototypeImp::get(ExecState *exec, const UString &propertyName) const
00097 {
00098   return lookupGetFunction<StringProtoFuncImp, StringInstanceImp>( exec, propertyName, &stringTable, this );
00099 }
00100 
00101 // ------------------------------ StringProtoFuncImp ---------------------------
00102 
00103 StringProtoFuncImp::StringProtoFuncImp(ExecState *exec, int i, int len)
00104   : InternalFunctionImp(
00105     static_cast<FunctionPrototypeImp*>(exec->interpreter()->builtinFunctionPrototype().imp())
00106     ), id(i)
00107 {
00108   Value protect(this);
00109   put(exec,"length",Number(len),DontDelete|ReadOnly|DontEnum);
00110 }
00111 
00112 bool StringProtoFuncImp::implementsCall() const
00113 {
00114   return true;
00115 }
00116 
00117 // ECMA 15.5.4.2 - 15.5.4.20
00118 Value StringProtoFuncImp::call(ExecState *exec, Object &thisObj, const List &args)
00119 {
00120   Value result;
00121 
00122   // toString and valueOf are no generic function.
00123   if (id == ToString || id == ValueOf) {
00124     KJS_CHECK_THIS( StringInstanceImp, thisObj );
00125 
00126     return String(thisObj.internalValue().toString(exec));
00127   }
00128 
00129   int n, m;
00130   UString u2, u3;
00131   int pos, p0, i;
00132   double d = 0.0;
00133 
00134   UString s = thisObj.toString(exec);
00135 
00136   int len = s.size();
00137   Value a0 = args[0];
00138   Value a1 = args[1];
00139 
00140   switch (id) {
00141   case ToString:
00142   case ValueOf:
00143     // handled above
00144     break;
00145   case CharAt:
00146     pos = a0.toInteger(exec);
00147     if (pos < 0 || pos >= len)
00148       s = "";
00149     else
00150       s = s.substr(pos, 1);
00151     result = String(s);
00152     break;
00153   case CharCodeAt:
00154     pos = a0.toInteger(exec);
00155     if (pos < 0 || pos >= len)
00156       d = NaN;
00157     else {
00158       UChar c = s[pos];
00159       d = (c.high() << 8) + c.low();
00160     }
00161     result = Number(d);
00162     break;
00163   case Concat: {
00164     ListIterator it = args.begin();
00165     for ( ; it != args.end() ; ++it) {
00166         s += it->toString(exec);
00167     }
00168     result = String(s);
00169     break;
00170   }
00171   case IndexOf:
00172     u2 = a0.toString(exec);
00173     if (a1.type() == UndefinedType)
00174       pos = 0;
00175     else
00176       pos = a1.toInteger(exec);
00177     d = s.find(u2, pos);
00178     result = Number(d);
00179     break;
00180   case LastIndexOf:
00181     u2 = a0.toString(exec);
00182     d = a1.toNumber(exec);
00183     if (a1.type() == UndefinedType || KJS::isNaN(d) || KJS::isPosInf(d))
00184       pos = len;
00185     else
00186       pos = a1.toInteger(exec);
00187     if (pos < 0)
00188       pos = 0;
00189     d = s.rfind(u2, pos);
00190     result = Number(d);
00191     break;
00192   case Match:
00193   case Search: {
00194     RegExp *reg, *tmpReg = 0;
00195     RegExpImp *imp = 0;
00196     if (a0.isA(ObjectType) && a0.toObject(exec).inherits(&RegExpImp::info))
00197     {
00198       imp = static_cast<RegExpImp *>( a0.toObject(exec).imp() );
00199       reg = imp->regExp();
00200     }
00201     else
00202     { /*
00203        *  ECMA 15.5.4.12 String.prototype.search (regexp)
00204        *  If regexp is not an object whose [[Class]] property is "RegExp", it is
00205        *  replaced with the result of the expression new RegExp(regexp).
00206        */
00207       reg = tmpReg = new RegExp(a0.toString(exec), RegExp::None);
00208     }
00209     RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->interpreter()->builtinRegExp().imp());
00210     int **ovector = regExpObj->registerRegexp(reg, s);
00211     UString mstr = reg->match(s, -1, &pos, ovector);
00212     if (id == Search) {
00213       result = Number(pos);
00214     } else { // Match
00215       if (mstr.isNull())
00216         return Null(); // no match
00217       if ((reg->flags() & RegExp::Global) == 0) {
00218     // case without 'g' flag is handled like RegExp.prototype.exec
00219     regExpObj->setSubPatterns(reg->subPatterns());
00220     result = regExpObj->arrayOfMatches(exec,mstr);
00221       } else {
00222     // return array of matches
00223     List list;
00224     int lastIndex = 0;
00225     while (pos >= 0) {
00226       list.append(String(mstr));
00227       lastIndex = pos;
00228       pos += mstr.isEmpty() ? 1 : mstr.size();
00229       delete [] *ovector;
00230       mstr = reg->match(s, pos, &pos, ovector);
00231     }
00232     if (imp)
00233       imp->put(exec, "lastIndex", Number(lastIndex), DontDelete|DontEnum);
00234     result = exec->interpreter()->builtinArray().construct(exec, list);
00235       }
00236     }
00237     delete tmpReg;
00238     break;
00239   }
00240   case Replace:
00241     if (a0.type() == ObjectType && a0.toObject(exec).inherits(&RegExpImp::info)) {
00242       RegExpImp* imp = static_cast<RegExpImp *>( a0.toObject(exec).imp() );
00243       RegExp *reg = imp->regExp();
00244       bool global = false;
00245       Value tmp = imp->get(exec,"global");
00246       if (tmp.type() != UndefinedType && tmp.toBoolean(exec) == true)
00247         global = true;
00248 
00249       RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->interpreter()->builtinRegExp().imp());
00250       int lastIndex = 0;
00251       Object o1;
00252       // Test if 2nd arg is a function (new in JS 1.3)
00253       if ( a1.type() == ObjectType && a1.toObject(exec).implementsCall() )
00254         o1 = a1.toObject(exec);
00255       else
00256         u3 = a1.toString(exec); // 2nd arg is the replacement string
00257 
00258       // This is either a loop (if global is set) or a one-way (if not).
00259       do {
00260         int **ovector = regExpObj->registerRegexp( reg, s );
00261         UString mstr = reg->match(s, lastIndex, &pos, ovector);
00262         regExpObj->setSubPatterns(reg->subPatterns());
00263         if (pos == -1)
00264           break;
00265         len = mstr.size();
00266         // special case of empty match
00267         if (len == 0 && lastIndex > 0) {
00268           pos = lastIndex + 1;
00269           if (pos > s.size())
00270             break;
00271         }
00272 
00273         UString rstr;
00274         // Prepare replacement
00275         if ( o1.isNull() )
00276         {
00277           rstr = u3;
00278           bool ok;
00279           // check if u3 matches $1 or $2 etc
00280           for (int i = 0; (i = rstr.find(UString("$"), i)) != -1; i++) {
00281             if (i+1<rstr.size() && rstr[i+1] == '$') {  // "$$" -> "$"
00282               rstr = rstr.substr(0,i) + "$" + rstr.substr(i+2);
00283               continue;
00284             }
00285             // Assume number part is one char exactly
00286             unsigned long pos = rstr.substr(i+1,1).toULong(&ok);
00287             if (ok && pos <= (unsigned)reg->subPatterns()) {
00288               rstr = rstr.substr(0,i)
00289                      + s.substr((*ovector)[2*pos],
00290                                 (*ovector)[2*pos+1]-(*ovector)[2*pos])
00291                      + rstr.substr(i+2);
00292               i += (*ovector)[2*pos+1]-(*ovector)[2*pos] - 1; // -1 offsets i++
00293             }
00294           }
00295         } else // 2nd arg is a function call. Spec from http://devedge.netscape.com/library/manuals/2000/javascript/1.5/reference/string.html#1194258
00296         {
00297           List l;
00298           l.append(String(mstr)); // First arg: complete matched substring
00299           // Then the submatch strings
00300           for ( unsigned int sub = 1; sub <= reg->subPatterns() ; ++sub )
00301             l.append( String( s.substr((*ovector)[2*sub],
00302                                (*ovector)[2*sub+1]-(*ovector)[2*sub]) ) );
00303           l.append(Number(pos)); // The offset within the string where the match occurred
00304           l.append(String(s)); // Last arg: the string itself. Can't see the difference with the 1st arg!
00305           Object thisObj = exec->interpreter()->globalObject();
00306           rstr = o1.call( exec, thisObj, l ).toString(exec);
00307         }
00308         lastIndex = pos + rstr.size();
00309         s = s.substr(0, pos) + rstr + s.substr(pos + len);
00310         //fprintf(stderr,"pos=%d,len=%d,lastIndex=%d,u=%s\n",pos,len,lastIndex,u.ascii());
00311       } while (global);
00312 
00313       result = String(s);
00314     } else { // First arg is a string
00315       u2 = a0.toString(exec);
00316       pos = s.find(u2);
00317       len = u2.size();
00318       // Do the replacement
00319       if (pos == -1)
00320         result = String(s);
00321       else {
00322         u3 = s.substr(0, pos) + a1.toString(exec) +
00323              s.substr(pos + len);
00324         result = String(u3);
00325       }
00326     }
00327     break;
00328   case Slice: // http://developer.netscape.com/docs/manuals/js/client/jsref/string.htm#1194366 or 15.5.4.13
00329     {
00330         // The arg processing is very much like ArrayProtoFunc::Slice
00331         // We return a new array
00332         result = exec->interpreter()->builtinArray().construct(exec,List::empty());
00333         int begin = args[0].toUInt32(exec);
00334         int end = len;
00335         if (args[1].type() != UndefinedType)
00336         {
00337           end = args[1].toUInt32(exec);
00338           if ( end < 0 )
00339             end += len;
00340         }
00341         // safety tests
00342         if ( begin < 0 || end < 0 || begin >= end ) {
00343             result = String();
00344             break;
00345         }
00346         //printf( "Slicing from %d to %d \n", begin, end );
00347         result = String(s.substr(begin, end-begin));
00348         break;
00349     }
00350     case Split: { // 15.5.4.14
00351     Object constructor = exec->interpreter()->builtinArray();
00352     Object res = Object::dynamicCast(constructor.construct(exec,List::empty()));
00353     result = res;
00354     i = p0 = 0;
00355     d = (a1.type() != UndefinedType) ? a1.toInteger(exec) : -1; // optional max number
00356     if (a0.type() == ObjectType && Object::dynamicCast(a0).inherits(&RegExpImp::info)) {
00357       Object obj0 = Object::dynamicCast(a0);
00358       RegExp reg(obj0.get(exec,"source").toString(exec));
00359       if (s.isEmpty() && !reg.match(s, 0).isNull()) {
00360     // empty string matched by regexp -> empty array
00361     res.put(exec, "length", Number(0), DontDelete|ReadOnly|DontEnum);
00362     break;
00363       }
00364       pos = 0;
00365       while (pos < s.size()) {
00366     // TODO: back references
00367         int mpos;
00368         int *ovector = 0L;
00369     UString mstr = reg.match(s, pos, &mpos, &ovector);
00370         delete [] ovector; ovector = 0L;
00371     if (mpos < 0)
00372       break;
00373     pos = mpos + (mstr.isEmpty() ? 1 : mstr.size());
00374     if (mpos != p0 || !mstr.isEmpty()) {
00375       res.put(exec,UString::from(i), String(s.substr(p0, mpos-p0)));
00376       p0 = mpos + mstr.size();
00377       i++;
00378     }
00379       }
00380     } else if (a0.type() != UndefinedType) {
00381       u2 = a0.toString(exec);
00382       if (u2.isEmpty()) {
00383     if (s.isEmpty()) {
00384       // empty separator matches empty string -> empty array
00385       put(exec,"length", Number(0));
00386       break;
00387     } else {
00388       while (i != d && i < s.size()-1)
00389         res.put(exec,UString::from(i++), String(s.substr(p0++, 1)));
00390     }
00391       } else {
00392     while (i != d && (pos = s.find(u2, p0)) >= 0) {
00393       res.put(exec,UString::from(i), String(s.substr(p0, pos-p0)));
00394       p0 = pos + u2.size();
00395       i++;
00396     }
00397       }
00398     }
00399     // add remaining string, if any
00400     if (i != d)
00401       res.put(exec,UString::from(i++), String(s.substr(p0)));
00402     res.put(exec,"length", Number(i));
00403     }
00404     break;
00405   case Substr: {
00406     n = a0.toInteger(exec);
00407     m = a1.toInteger(exec);
00408     int d, d2;
00409     if (n >= 0)
00410       d = n;
00411     else
00412       d = maxInt(len + n, 0);
00413     if (a1.type() == UndefinedType)
00414       d2 = len - d;
00415     else
00416       d2 = minInt(maxInt(m, 0), len - d);
00417     result = String(s.substr(d, d2));
00418     break;
00419   }
00420   case Substring: {
00421     double start = a0.toNumber(exec);
00422     double end = a1.toNumber(exec);
00423     if (KJS::isNaN(start))
00424       start = 0;
00425     if (KJS::isNaN(end))
00426       end = 0;
00427     if (start < 0)
00428       start = 0;
00429     if (end < 0)
00430       end = 0;
00431     if (start > len)
00432       start = len;
00433     if (end > len)
00434       end = len;
00435     if (a1.type() == UndefinedType)
00436       end = len;
00437     if (start > end) {
00438       double temp = end;
00439       end = start;
00440       start = temp;
00441     }
00442     result = String(s.substr((int)start, (int)end-(int)start));
00443     }
00444     break;
00445   case ToLowerCase:
00446     for (i = 0; i < len; i++)
00447       s[i] = s[i].toLower();
00448     result = String(s);
00449     break;
00450   case ToUpperCase:
00451     for (i = 0; i < len; i++)
00452       s[i] = s[i].toUpper();
00453     result = String(s);
00454     break;
00455 #ifndef KJS_PURE_ECMA
00456   case Big:
00457     result = String("<BIG>" + s + "</BIG>");
00458     break;
00459   case Small:
00460     result = String("<SMALL>" + s + "</SMALL>");
00461     break;
00462   case Blink:
00463     result = String("<BLINK>" + s + "</BLINK>");
00464     break;
00465   case Bold:
00466     result = String("<B>" + s + "</B>");
00467     break;
00468   case Fixed:
00469     result = String("<TT>" + s + "</TT>");
00470     break;
00471   case Italics:
00472     result = String("<I>" + s + "</I>");
00473     break;
00474   case Strike:
00475     result = String("<STRIKE>" + s + "</STRIKE>");
00476     break;
00477   case Sub:
00478     result = String("<SUB>" + s + "</SUB>");
00479     break;
00480   case Sup:
00481     result = String("<SUP>" + s + "</SUP>");
00482     break;
00483   case Fontcolor:
00484     result = String("<FONT COLOR=" + a0.toString(exec) + ">"
00485             + s + "</FONT>");
00486     break;
00487   case Fontsize:
00488     result = String("<FONT SIZE=" + a0.toString(exec) + ">"
00489             + s + "</FONT>");
00490     break;
00491   case Anchor:
00492     result = String("<a name=" + a0.toString(exec) + ">"
00493             + s + "</a>");
00494     break;
00495   case Link:
00496     result = String("<a href=" + a0.toString(exec) + ">"
00497             + s + "</a>");
00498     break;
00499 #endif
00500   }
00501 
00502   return result;
00503 }
00504 
00505 // ------------------------------ StringObjectImp ------------------------------
00506 
00507 StringObjectImp::StringObjectImp(ExecState *exec,
00508                                  FunctionPrototypeImp *funcProto,
00509                                  StringPrototypeImp *stringProto)
00510   : InternalFunctionImp(funcProto)
00511 {
00512   Value protect(this);
00513   // ECMA 15.5.3.1 String.prototype
00514   put(exec,"prototype", Object(stringProto), DontEnum|DontDelete|ReadOnly);
00515 
00516   put(exec,"fromCharCode", Object(new StringObjectFuncImp(exec,funcProto)), DontEnum);
00517 
00518   // no. of arguments for constructor
00519   put(exec,"length", Number(1), ReadOnly|DontDelete|DontEnum);
00520 }
00521 
00522 
00523 bool StringObjectImp::implementsConstruct() const
00524 {
00525   return true;
00526 }
00527 
00528 // ECMA 15.5.2
00529 Object StringObjectImp::construct(ExecState *exec, const List &args)
00530 {
00531   Object proto = exec->interpreter()->builtinStringPrototype();
00532   Object obj(new StringInstanceImp(proto ));
00533 
00534   UString s;
00535   if (args.size() > 0)
00536     s = args.begin()->toString(exec);
00537   else
00538     s = UString("");
00539 
00540   obj.setInternalValue(String(s));
00541   obj.put(exec, "length", Number(s.size()), ReadOnly|DontEnum|DontDelete);
00542 
00543   return obj;
00544 }
00545 
00546 bool StringObjectImp::implementsCall() const
00547 {
00548   return true;
00549 }
00550 
00551 // ECMA 15.5.1
00552 Value StringObjectImp::call(ExecState *exec, Object &/*thisObj*/, const List &args)
00553 {
00554   if (args.isEmpty())
00555     return String("");
00556   else {
00557     Value v = args[0];
00558     return String(v.toString(exec));
00559   }
00560 }
00561 
00562 // ------------------------------ StringObjectFuncImp --------------------------
00563 
00564 // ECMA 15.5.3.2 fromCharCode()
00565 StringObjectFuncImp::StringObjectFuncImp(ExecState *exec, FunctionPrototypeImp *funcProto)
00566   : InternalFunctionImp(funcProto)
00567 {
00568   Value protect(this);
00569   put(exec,"length",Number(1),DontDelete|ReadOnly|DontEnum);
00570 }
00571 
00572 bool StringObjectFuncImp::implementsCall() const
00573 {
00574   return true;
00575 }
00576 
00577 Value StringObjectFuncImp::call(ExecState *exec, Object &/*thisObj*/, const List &args)
00578 {
00579   UString s;
00580   if (args.size()) {
00581     UChar *buf = new UChar[args.size()];
00582     UChar *p = buf;
00583     ListIterator it = args.begin();
00584     while (it != args.end()) {
00585       unsigned short u = it->toUInt16(exec);
00586       *p++ = UChar(u);
00587       it++;
00588     }
00589     s = UString(buf, args.size(), false);
00590   } else
00591     s = "";
00592 
00593   return String(s);
00594 }
KDE Logo
This file is part of the documentation for kdelibs Version 3.1.4.
Documentation copyright © 1996-2002 the KDE developers.
Generated on Sun Feb 27 22:15:18 2005 by doxygen 1.3.4 written by Dimitri van Heesch, © 1997-2001