presage 0.9.2~beta
tokenizer.cpp
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#include "tokenizer.h"
26
28 std::istream& is,
29 const std::string blankspaces,
30 const std::string separators
31)
32 : stream(is),
33 lowercase(false)
34{
35 // this should be changed to deal with a !good() stream
36 // appropriately
37 //assert(stream.good());
38
39 offset = stream.tellg();
40 sstate = stream.rdstate();
41
43
44 stream.seekg(0, std::ios::end);
45 offend = stream.tellg();
46 stream.seekg(0, std::ios::beg);
47 offbeg = stream.tellg();
48
51}
52
54{
55 // reset stream state to enable repeatability
56 // (see reverseTokenizerTest::testRepeatability())
57 stream.setstate(sstate);
58 stream.clear();
59}
60
61void Tokenizer::blankspaceChars(const std::string chars)
62{
63 blankspaces = chars;
64}
65
66std::string Tokenizer::blankspaceChars() const
67{
68 return blankspaces;
69}
70
71void Tokenizer::separatorChars(const std::string chars)
72{
73 separators = chars;
74}
75
76std::string Tokenizer::separatorChars() const
77{
78 return separators;
79}
80
81void Tokenizer::lowercaseMode(const bool value)
82{
83 lowercase = value;
84}
85
87{
88 return lowercase;
89}
90
91bool Tokenizer::isBlankspace(const int character) const
92{
93 std::string::size_type ret = blankspaces.find(character);
94 if (ret == std::string::npos) {
95 return false;
96 } else {
97 return true;
98 }
99}
100
101bool Tokenizer::isSeparator(const int character) const
102{
103 std::string::size_type ret = separators.find(character);
104 if (ret == std::string::npos) {
105 return false;
106 } else {
107 return true;
108 }
109}
std::istream & stream
Definition: tokenizer.h:144
std::string separatorChars() const
Definition: tokenizer.cpp:76
std::streamoff offend
Definition: tokenizer.h:147
std::streamoff offbeg
Definition: tokenizer.h:146
virtual ~Tokenizer()
Definition: tokenizer.cpp:53
bool isSeparator(const int character) const
Definition: tokenizer.cpp:101
std::string blankspaces
Definition: tokenizer.h:154
std::streamoff offset
Definition: tokenizer.h:148
bool lowercase
Definition: tokenizer.h:157
bool lowercaseMode() const
Definition: tokenizer.cpp:86
std::ios::iostate sstate
Definition: tokenizer.h:145
std::string separators
Definition: tokenizer.h:155
bool isBlankspace(const int character) const
Definition: tokenizer.cpp:91
Tokenizer(std::istream &stream, const std::string blankspaces, const std::string separators)
Definition: tokenizer.cpp:27
std::string blankspaceChars() const
Definition: tokenizer.cpp:66