htmltokenizer.h
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef HTMLTOKENIZER_H
00030 #define HTMLTOKENIZER_H
00031
00032 #include <qstring.h>
00033 #include <qobject.h>
00034 #include <qptrqueue.h>
00035
00036 #include "misc/loader_client.h"
00037 #include "misc/htmltags.h"
00038 #include "misc/stringit.h"
00039 #include "xml/dom_stringimpl.h"
00040 #include "xml/xml_tokenizer.h"
00041 #include "xml/dom_elementimpl.h"
00042 #include "xml/dom_docimpl.h"
00043
00044 class KCharsets;
00045 class KHTMLView;
00046
00047 namespace DOM {
00048 class DocumentPtr;
00049 class DocumentFragmentImpl;
00050 }
00051
00052 namespace khtml {
00053 class CachedScript;
00054 class KHTMLParser;
00055
00062 class Token
00063 {
00064 public:
00065 Token() {
00066 id = 0;
00067 attrs = 0;
00068 text = 0;
00069 flat = false;
00070
00071 }
00072 ~Token() {
00073 if(attrs) attrs->deref();
00074 if(text) text->deref();
00075 }
00076 void addAttribute(DocumentImpl* doc, QChar* buffer, const QString& attrName, const DOMString& v)
00077 {
00078 AttributeImpl* a = 0;
00079 if(buffer->unicode())
00080 a = new AttributeImpl(buffer->unicode(), v.implementation());
00081 else if ( !attrName.isEmpty() && attrName != "/" )
00082 a = new AttributeImpl(doc->attrId(0, DOMString(attrName).implementation(), false, 0),
00083 v.implementation());
00084
00085 if (a) {
00086 if(!attrs) {
00087 attrs = new DOM::NamedAttrMapImpl(0);
00088 attrs->ref();
00089 }
00090 attrs->insertAttribute(a);
00091 }
00092 }
00093 void reset()
00094 {
00095 if(attrs) {
00096 attrs->deref();
00097 attrs = 0;
00098 }
00099 id = 0;
00100 if(text) {
00101 text->deref();
00102 text = 0;
00103 }
00104 flat = false;
00105 }
00106 DOM::NamedAttrMapImpl* attrs;
00107 DOMStringImpl* text;
00108 ushort id;
00109 bool flat;
00110 };
00111
00112
00113 #define TAB_SIZE 8
00114
00115
00116
00117 class HTMLTokenizer : public Tokenizer, public CachedObjectClient
00118 {
00119 public:
00120 HTMLTokenizer(DOM::DocumentPtr *, KHTMLView * = 0);
00121 HTMLTokenizer(DOM::DocumentPtr *, DOM::DocumentFragmentImpl *frag);
00122 virtual ~HTMLTokenizer();
00123
00124 void begin();
00125 void write( const QString &str, bool appendData );
00126 void end();
00127 void finish();
00128 virtual void setOnHold(bool _onHold);
00129
00130 protected:
00131 void reset();
00132 void addPending();
00133 void processToken();
00134 void processListing(khtml::DOMStringIt list);
00135
00136 void parseComment(khtml::DOMStringIt &str);
00137 void parseServer(khtml::DOMStringIt &str);
00138 void parseText(khtml::DOMStringIt &str);
00139 void parseListing(khtml::DOMStringIt &str);
00140 void parseSpecial(khtml::DOMStringIt &str);
00141 void parseTag(khtml::DOMStringIt &str);
00142 void parseEntity(khtml::DOMStringIt &str, QChar *&dest, bool start = false);
00143 void parseProcessingInstruction(khtml::DOMStringIt &str);
00144 void scriptHandler();
00145 void scriptExecution(const QString& script, QString scriptURL = QString(),
00146 int baseLine = 0);
00147 void setSrc(const QString& source);
00148
00149
00150
00151 inline void checkBuffer(int len = 10)
00152 {
00153 if ( (dest - buffer) > size-len )
00154 enlargeBuffer(len);
00155 }
00156 inline void checkScriptBuffer(int len = 10)
00157 {
00158 if ( scriptCodeSize + len >= scriptCodeMaxSize )
00159 enlargeScriptBuffer(len);
00160 }
00161
00162 void enlargeBuffer(int len);
00163 void enlargeScriptBuffer(int len);
00164
00165
00166 void notifyFinished(khtml::CachedObject *finishedObj);
00167 protected:
00168
00170 QChar *buffer;
00171 QChar *dest;
00172
00173 khtml::Token currToken;
00174
00175
00176 int size;
00177
00178
00180
00181 enum
00182 {
00183 NoQuote = 0,
00184 SingleQuote,
00185 DoubleQuote
00186 } tquote;
00187
00188 enum
00189 {
00190 NonePending = 0,
00191 SpacePending,
00192 LFPending,
00193 TabPending
00194 } pending;
00195
00196
00197
00198 enum
00199 {
00200 NoneDiscard = 0,
00201 SpaceDiscard,
00202 LFDiscard,
00203 AllDiscard
00204 } discard;
00205
00206
00207 bool skipLF;
00208
00209
00210 bool startTag;
00211
00212
00213
00214 enum {
00215 NoTag = 0,
00216 TagName,
00217 SearchAttribute,
00218 AttributeName,
00219 SearchEqual,
00220 SearchValue,
00221 QuotedValue,
00222 Value,
00223 SearchEnd
00224 } tag;
00225
00226
00227 enum {
00228 NoEntity = 0,
00229 SearchEntity,
00230 NumericSearch,
00231 Hexadecimal,
00232 Decimal,
00233 EntityName,
00234 SearchSemicolon
00235 } Entity;
00236
00237
00238 bool script;
00239
00240 QChar EntityChar;
00241
00242
00243 bool pre;
00244
00245
00246 int prePos;
00247
00248
00249 bool style;
00250
00251
00252 bool select;
00253
00254
00255 bool xmp;
00256
00257
00258 bool title;
00259
00260
00261 bool plaintext;
00262
00263
00264 bool processingInstruction;
00265
00266
00267 bool comment;
00268
00269
00270 bool textarea;
00271
00272
00273 bool escaped;
00274
00275
00276 bool server;
00277
00278 bool brokenServer;
00279
00280 bool brokenScript;
00281
00282
00283 QString attrName;
00284
00285
00286 QChar *scriptCode;
00287
00288 int scriptCodeSize;
00289
00290 int scriptCodeMaxSize;
00291
00292 int scriptCodeResync;
00293
00294
00295 QChar searchBuffer[ 10 ];
00296
00297 int searchCount;
00298
00299 const QChar *searchFor;
00300
00301 const char* searchStopper;
00302
00303 int searchStopperLen;
00304
00305
00306 bool noMoreData;
00307
00308 QString scriptSrc;
00309 QString scriptSrcCharset;
00310 bool javascript;
00311
00312 QString pendingSrc;
00313
00314
00315 int m_executingScript;
00316 QPtrQueue<khtml::CachedScript> cachedScript;
00317
00318 bool onHold;
00319
00320
00321
00322 bool brokenComments;
00323
00324 int lineno;
00325
00326 int scriptStartLineno;
00327 int tagStartLineno;
00328
00329 #define CBUFLEN 14
00330 char cBuffer[CBUFLEN+2];
00331 unsigned int cBufferPos;
00332
00333 QString _src;
00334 khtml::DOMStringIt src;
00335
00336 KCharsets *charsets;
00337 KHTMLParser *parser;
00338
00339 KHTMLView *view;
00340 };
00341
00342 }
00343
00344 #endif // HTMLTOKENIZER
00345
This file is part of the documentation for kdelibs Version 3.1.4.