blob: c3b67b343401b5d043ef492b9dd59cd74658c266 [file] [log] [blame]
Alex Stancuf1d5c912020-11-02 17:34:59 +02001/*************************************************************************
2*
3* Licensed under the Apache License, Version 2.0 (the "License");
4* you may not use this file except in compliance with the License.
5* You may obtain a copy of the License at
6*
7* http://www.apache.org/licenses/LICENSE-2.0
8*
9* Unless required by applicable law or agreed to in writing, software
10* distributed under the License is distributed on an "AS IS" BASIS,
11* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12* See the License for the specific language governing permissions and
13* limitations under the License.
14***************************************************************************/
15
16#ifndef DOZERG_REGXSTRING_IMPL_H_20091012
17#define DOZERG_REGXSTRING_IMPL_H_20091012
18
19#include <string>
20#include <vector>
21#include <iosfwd>
22#include <utility>
23#include <memory>
24
25#define _DZ_DEBUG 0
26
27#define _MEM_LEAK 0
28
29//allocator choice
30#ifndef __GNUC__
31# define __DZ_ALLOC std::allocator
32#else
33# ifndef NDEBUG
34# define __DZ_ALLOC std::allocator
35# else
36# include <ext/pool_allocator.h>
37# define __DZ_ALLOC __gnu_cxx::__pool_alloc
38# endif
39#endif
40
41//stl containers redefine
42 //Sequence
43#define __DZ_BASIC_STRING(C) std::basic_string< C,std::char_traits< C >,__DZ_ALLOC< C > >
44#define __DZ_BASIC_STRING1(C,T) std::basic_string< C,T,__DZ_ALLOC< C > >
45#define __DZ_STRING __DZ_BASIC_STRING(char)
46#define __DZ_WSTRING __DZ_BASIC_STRING(wchar_t)
47#define __DZ_DEQUE(T) std::deque< T,__DZ_ALLOC< T > >
48#define __DZ_LIST(T) std::list< T,__DZ_ALLOC< T > >
49#define __DZ_VECTOR(T) std::vector< T,__DZ_ALLOC< T > >
50 //Associative
51#define __DZ_MAP(K,V) std::map< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
52#define __DZ_MAP1(K,V,C) std::map< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
53#define __DZ_MULTIMAP(K,V) std::multimap< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
54#define __DZ_MULTIMAP1(K,V,C) std::multimap< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
55#define __DZ_SET(K) std::set< K,std::less< K >,__DZ_ALLOC< K > >
56#define __DZ_SET1(K,C) std::set< K,C,__DZ_ALLOC< K > >
57#define __DZ_MULTISET(K) std::multiset< K,std::less< K >,__DZ_ALLOC< K > >
58#define __DZ_MULTISET1(K,C) std::multiset< K,C,__DZ_ALLOC< K > >
59 //String Stream
60#define __DZ_BASIC_ISTRINGSTREAM(C) std::basic_istringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
61#define __DZ_BASIC_ISTRINGSTREAM1(C,T) std::basic_istringstream< C,T,__DZ_ALLOC< C > >
62#define __DZ_BASIC_OSTRINGSTREAM(C) std::basic_ostringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
63#define __DZ_BASIC_OSTRINGSTREAM1(C,T) std::basic_ostringstream< C,T,__DZ_ALLOC< C > >
64#define __DZ_BASIC_STRINGSTREAM(C) std::basic_stringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
65#define __DZ_BASIC_STRINGSTREAM1(C,T) std::basic_stringstream< C,T,__DZ_ALLOC< C > >
66#define __DZ_ISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(char)
67#define __DZ_OSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(char)
68#define __DZ_STRINGSTREAM __DZ_BASIC_STRINGSTREAM(char)
69#define __DZ_WISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(wchar_t)
70#define __DZ_WOSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(wchar_t)
71#define __DZ_WSTRINGSTREAM __DZ_BASIC_STRINGSTREAM(wchar_t)
72 //Stream Buf
73#define __DZ_BASIC_STRINGBUF(C) std::basic_stringbuf< C,std::char_traits< C >,__DZ_ALLOC< C > >
74#define __DZ_BASIC_STRINGBUF1(C,T) std::basic_stringbuf< C,T,__DZ_ALLOC< C > >
75#define __DZ_STRINGBUF __DZ_BASIC_STRINGBUF(char)
76#define __DZ_WSTRINGBUF __DZ_BASIC_STRINGBUF(wchar_t)
77 //Extension
78#define __DZ_ROPE(T) __gnu_cxx::rope< T,__DZ_ALLOC< T > >
79#define __DZ_SLIST(T) __gnu_cxx::slist< T,__DZ_ALLOC< T > >
80
81#define REGXSTRING_NS __DZ_Regx_String
82
83#define NAMESAPCE_BEGIN namespace __DZ_Regx_String{
84#define NAMESAPCE_END }
85
86struct Config;
87
88NAMESAPCE_BEGIN
89
90typedef std::pair<size_t,size_t> __RefValue;
91
92typedef __DZ_VECTOR(__RefValue) __Refs;
93
94typedef __DZ_VECTOR(char) __Ends;
95
96struct __ParseData{
97 __Ends ends_;
98 const Config & config_;
99 size_t i_;
100 int ref_;
101 //functions:
102 explicit __ParseData(const Config & config)
103 : config_(config)
104 , i_(0)
105 , ref_(0)
106 {}
107 int inEnds(int ch) const;
108};
109
110struct __GenerateData
111{
112 __Refs refs_;
113 __DZ_OSTRINGSTREAM & oss_;
114 explicit __GenerateData(__DZ_OSTRINGSTREAM & oss)
115 : oss_(oss)
116 {}
117};
118
119struct __NodeBase
120{
121 static __NodeBase * const REP_NULL; //replace with NULL(0)
122#if _MEM_LEAK
123 static int ref;
124 __NodeBase(){++ref;}
125#endif
126 virtual ~__NodeBase();
127 virtual __NodeBase * Optimize(__ParseData & pdata) = 0;
128 virtual void RandString(__GenerateData & gdata) const = 0;
129 virtual void Debug(std::ostream & out,int lvl) const = 0;
130 virtual int Repeat(int ch);
131 virtual void AppendNode(__NodeBase * node);
132};
133
134class __Edge : public __NodeBase
135{
136 bool begin_;
137public:
138 explicit __Edge(int ch);
139 __NodeBase * Optimize(__ParseData & pdata);
140 void RandString(__GenerateData & gdata) const;
141 void Debug(std::ostream & out,int lvl) const;
142};
143
144class __Text : public __NodeBase
145{
146 __DZ_STRING str_;
147public:
148 //functions
149 explicit __Text(int ch);
150 __NodeBase * Optimize(__ParseData & pdata);
151 void RandString(__GenerateData & gdata) const;
152 void Debug(std::ostream & out,int lvl) const;
153 __Text & operator +=(const __Text & other){str_ += other.str_;return *this;}
154};
155
156class __Charset : public __NodeBase
157{
158 __DZ_STRING str_;
159 size_t inc_;
160public:
161 //functions
162 __Charset();
163 __Charset(const __DZ_STRING & str,bool include);
164 __NodeBase * Optimize(__ParseData & pdata);
165 void RandString(__GenerateData & gdata) const;
166 void Debug(std::ostream & out,int lvl) const;
167 void Exclude();
168 void AddChar(int ch);
169 void AddRange(int from,int to);
170 void AddRange(__Charset * node);
171 void Unique();
172private:
173 void unite(__Charset & node);
174 void reverse();
175 void unique();
176};
177
178struct __Repeat : public __NodeBase
179{
180 static const int INFINITE = 1 << 16;
181private:
182 static const int _REPEAT_MAX = __Repeat::INFINITE - 1;
183 static const int _NON_GREEDY = 1 << 17;
184 static const int _PROSSESSIVE = 1 << 18;
185 static const int _CLEAR_FLAGS = _NON_GREEDY - 1;
186 __NodeBase * node_;
187 int min_,max_;
188public:
189 //functions
190 __Repeat(__NodeBase * node,int ch);
191 __Repeat(__NodeBase * node,int min,int max);
192 ~__Repeat();
193 __NodeBase * Optimize(__ParseData & pdata);
194 void RandString(__GenerateData & gdata) const;
195 void Debug(std::ostream & out,int lvl) const;
196 int Repeat(int ch);
197private:
198 bool isInfinite() const{return (max_ & INFINITE) != 0;}
199 bool isNonGreedy() const{return (min_ & _NON_GREEDY) != 0;}
200 bool isPossessive() const{return (min_ & _PROSSESSIVE) != 0;}
201 bool canRepeat() const{return !(min_ & (_NON_GREEDY | _PROSSESSIVE));}
202};
203
204class __Seq : public __NodeBase
205{
206 typedef __DZ_VECTOR(__NodeBase *) __Con;
207 __Con seq_;
208public:
209 //functions
210 explicit __Seq(__NodeBase * node);
211 ~__Seq();
212 __NodeBase * Optimize(__ParseData & pdata);
213 void RandString(__GenerateData & gdata) const;
214 void Debug(std::ostream & out,int lvl) const;
215 void AppendNode(__NodeBase * node);
216};
217
218class __Group : public __NodeBase
219{
220 static const int INDEX = 1 << 16; //group index flag
221 static const size_t MAX_GROUPS = 9;
222 __NodeBase * node_;
223 size_t mark_;
224public:
225 //functions
226 __Group(__NodeBase * node,int mark);
227 ~__Group();
228 __NodeBase * Optimize(__ParseData & pdata);
229 void RandString(__GenerateData & gdata) const;
230 void Debug(std::ostream & out,int lvl) const;
231};
232
233class __Select : public __NodeBase
234{
235 typedef __DZ_VECTOR(__NodeBase *) __Con;
236 __Con sel_;
237 size_t sz_;
238public:
239 //functions
240 explicit __Select(__NodeBase * node);
241 ~__Select();
242 __NodeBase * Optimize(__ParseData & pdata);
243 void RandString(__GenerateData & gdata) const;
244 void Debug(std::ostream & out,int lvl) const;
245 void AppendNode(__NodeBase * node);
246};
247
248class __Ref : public __NodeBase
249{
250 size_t index_;
251public:
252 explicit __Ref(int index);
253 __NodeBase * Optimize(__ParseData & pdata);
254 void RandString(__GenerateData & gdata) const;
255 void Debug(std::ostream & out,int lvl) const;
256};
257
258class __CRegxString
259{
260 typedef std::pair<__NodeBase *,int> __Ret;
261public:
262 __CRegxString();
263 ~__CRegxString(){uninit();}
264 void ParseRegx(const __DZ_STRING & regx,const Config * config);
265 __DZ_STRING Regx() const{return regx_;}
266 const __DZ_STRING & RandString();
267 const __DZ_STRING & LastString() const{return str_;}
268 void Debug(std::ostream & out) const;
269private:
270 __CRegxString(const __CRegxString &);
271 __CRegxString & operator =(const __CRegxString &);
272 void uninit();
273 __Ret processSeq(__ParseData & pdata);
274 __Ret processSlash(bool bNode,__ParseData & pdata);
275 __NodeBase * processSet(__ParseData & pdata);
276 __NodeBase * processGroup(__ParseData & pdata);
277 __Ret processSelect(__NodeBase * node,__ParseData & pdata);
278 __NodeBase * processRepeat(__NodeBase * node,__ParseData & pdata);
279 int processInt(int & result,__ParseData & pdata);
280 bool processRange(int & result,__ParseData & pdata);
281 int ignoreSubexpMarks(__ParseData & pdata);
282 //fields:
283 __DZ_STRING regx_;
284 __DZ_STRING str_;
285 __NodeBase * top_; //regx tree
286};
287
288NAMESAPCE_END
289
290#endif