Alex Stancu | f1d5c91 | 2020-11-02 17:34:59 +0200 | [diff] [blame] | 1 | /*************************************************************************
|
| 2 | *
|
| 3 | * Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 | * you may not use this file except in compliance with the License.
|
| 5 | * You may obtain a copy of the License at
|
| 6 | *
|
| 7 | * http://www.apache.org/licenses/LICENSE-2.0
|
| 8 | *
|
| 9 | * Unless required by applicable law or agreed to in writing, software
|
| 10 | * distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 | * See the License for the specific language governing permissions and
|
| 13 | * limitations under the License.
|
| 14 | ***************************************************************************/
|
| 15 |
|
| 16 | #ifndef DOZERG_REGXSTRING_IMPL_H_20091012
|
| 17 | #define DOZERG_REGXSTRING_IMPL_H_20091012
|
| 18 |
|
| 19 | #include <string>
|
| 20 | #include <vector>
|
| 21 | #include <iosfwd>
|
| 22 | #include <utility>
|
| 23 | #include <memory>
|
| 24 |
|
| 25 | #define _DZ_DEBUG 0
|
| 26 |
|
| 27 | #define _MEM_LEAK 0
|
| 28 |
|
| 29 | //allocator choice
|
| 30 | #ifndef __GNUC__
|
| 31 | # define __DZ_ALLOC std::allocator
|
| 32 | #else
|
| 33 | # ifndef NDEBUG
|
| 34 | # define __DZ_ALLOC std::allocator
|
| 35 | # else
|
| 36 | # include <ext/pool_allocator.h>
|
| 37 | # define __DZ_ALLOC __gnu_cxx::__pool_alloc
|
| 38 | # endif
|
| 39 | #endif
|
| 40 |
|
| 41 | //stl containers redefine
|
| 42 | //Sequence
|
| 43 | #define __DZ_BASIC_STRING(C) std::basic_string< C,std::char_traits< C >,__DZ_ALLOC< C > >
|
| 44 | #define __DZ_BASIC_STRING1(C,T) std::basic_string< C,T,__DZ_ALLOC< C > >
|
| 45 | #define __DZ_STRING __DZ_BASIC_STRING(char)
|
| 46 | #define __DZ_WSTRING __DZ_BASIC_STRING(wchar_t)
|
| 47 | #define __DZ_DEQUE(T) std::deque< T,__DZ_ALLOC< T > >
|
| 48 | #define __DZ_LIST(T) std::list< T,__DZ_ALLOC< T > >
|
| 49 | #define __DZ_VECTOR(T) std::vector< T,__DZ_ALLOC< T > >
|
| 50 | //Associative
|
| 51 | #define __DZ_MAP(K,V) std::map< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
|
| 52 | #define __DZ_MAP1(K,V,C) std::map< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
|
| 53 | #define __DZ_MULTIMAP(K,V) std::multimap< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
|
| 54 | #define __DZ_MULTIMAP1(K,V,C) std::multimap< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
|
| 55 | #define __DZ_SET(K) std::set< K,std::less< K >,__DZ_ALLOC< K > >
|
| 56 | #define __DZ_SET1(K,C) std::set< K,C,__DZ_ALLOC< K > >
|
| 57 | #define __DZ_MULTISET(K) std::multiset< K,std::less< K >,__DZ_ALLOC< K > >
|
| 58 | #define __DZ_MULTISET1(K,C) std::multiset< K,C,__DZ_ALLOC< K > >
|
| 59 | //String Stream
|
| 60 | #define __DZ_BASIC_ISTRINGSTREAM(C) std::basic_istringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
|
| 61 | #define __DZ_BASIC_ISTRINGSTREAM1(C,T) std::basic_istringstream< C,T,__DZ_ALLOC< C > >
|
| 62 | #define __DZ_BASIC_OSTRINGSTREAM(C) std::basic_ostringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
|
| 63 | #define __DZ_BASIC_OSTRINGSTREAM1(C,T) std::basic_ostringstream< C,T,__DZ_ALLOC< C > >
|
| 64 | #define __DZ_BASIC_STRINGSTREAM(C) std::basic_stringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
|
| 65 | #define __DZ_BASIC_STRINGSTREAM1(C,T) std::basic_stringstream< C,T,__DZ_ALLOC< C > >
|
| 66 | #define __DZ_ISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(char)
|
| 67 | #define __DZ_OSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(char)
|
| 68 | #define __DZ_STRINGSTREAM __DZ_BASIC_STRINGSTREAM(char)
|
| 69 | #define __DZ_WISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(wchar_t)
|
| 70 | #define __DZ_WOSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(wchar_t)
|
| 71 | #define __DZ_WSTRINGSTREAM __DZ_BASIC_STRINGSTREAM(wchar_t)
|
| 72 | //Stream Buf
|
| 73 | #define __DZ_BASIC_STRINGBUF(C) std::basic_stringbuf< C,std::char_traits< C >,__DZ_ALLOC< C > >
|
| 74 | #define __DZ_BASIC_STRINGBUF1(C,T) std::basic_stringbuf< C,T,__DZ_ALLOC< C > >
|
| 75 | #define __DZ_STRINGBUF __DZ_BASIC_STRINGBUF(char)
|
| 76 | #define __DZ_WSTRINGBUF __DZ_BASIC_STRINGBUF(wchar_t)
|
| 77 | //Extension
|
| 78 | #define __DZ_ROPE(T) __gnu_cxx::rope< T,__DZ_ALLOC< T > >
|
| 79 | #define __DZ_SLIST(T) __gnu_cxx::slist< T,__DZ_ALLOC< T > >
|
| 80 |
|
| 81 | #define REGXSTRING_NS __DZ_Regx_String
|
| 82 |
|
| 83 | #define NAMESAPCE_BEGIN namespace __DZ_Regx_String{
|
| 84 | #define NAMESAPCE_END }
|
| 85 |
|
| 86 | struct Config;
|
| 87 |
|
| 88 | NAMESAPCE_BEGIN
|
| 89 |
|
| 90 | typedef std::pair<size_t,size_t> __RefValue;
|
| 91 |
|
| 92 | typedef __DZ_VECTOR(__RefValue) __Refs;
|
| 93 |
|
| 94 | typedef __DZ_VECTOR(char) __Ends;
|
| 95 |
|
| 96 | struct __ParseData{
|
| 97 | __Ends ends_;
|
| 98 | const Config & config_;
|
| 99 | size_t i_;
|
| 100 | int ref_;
|
| 101 | //functions:
|
| 102 | explicit __ParseData(const Config & config)
|
| 103 | : config_(config)
|
| 104 | , i_(0)
|
| 105 | , ref_(0)
|
| 106 | {}
|
| 107 | int inEnds(int ch) const;
|
| 108 | };
|
| 109 |
|
| 110 | struct __GenerateData
|
| 111 | {
|
| 112 | __Refs refs_;
|
| 113 | __DZ_OSTRINGSTREAM & oss_;
|
| 114 | explicit __GenerateData(__DZ_OSTRINGSTREAM & oss)
|
| 115 | : oss_(oss)
|
| 116 | {}
|
| 117 | };
|
| 118 |
|
| 119 | struct __NodeBase
|
| 120 | {
|
| 121 | static __NodeBase * const REP_NULL; //replace with NULL(0)
|
| 122 | #if _MEM_LEAK
|
| 123 | static int ref;
|
| 124 | __NodeBase(){++ref;}
|
| 125 | #endif
|
| 126 | virtual ~__NodeBase();
|
| 127 | virtual __NodeBase * Optimize(__ParseData & pdata) = 0;
|
| 128 | virtual void RandString(__GenerateData & gdata) const = 0;
|
| 129 | virtual void Debug(std::ostream & out,int lvl) const = 0;
|
| 130 | virtual int Repeat(int ch);
|
| 131 | virtual void AppendNode(__NodeBase * node);
|
| 132 | };
|
| 133 |
|
| 134 | class __Edge : public __NodeBase
|
| 135 | {
|
| 136 | bool begin_;
|
| 137 | public:
|
| 138 | explicit __Edge(int ch);
|
| 139 | __NodeBase * Optimize(__ParseData & pdata);
|
| 140 | void RandString(__GenerateData & gdata) const;
|
| 141 | void Debug(std::ostream & out,int lvl) const;
|
| 142 | };
|
| 143 |
|
| 144 | class __Text : public __NodeBase
|
| 145 | {
|
| 146 | __DZ_STRING str_;
|
| 147 | public:
|
| 148 | //functions
|
| 149 | explicit __Text(int ch);
|
| 150 | __NodeBase * Optimize(__ParseData & pdata);
|
| 151 | void RandString(__GenerateData & gdata) const;
|
| 152 | void Debug(std::ostream & out,int lvl) const;
|
| 153 | __Text & operator +=(const __Text & other){str_ += other.str_;return *this;}
|
| 154 | };
|
| 155 |
|
| 156 | class __Charset : public __NodeBase
|
| 157 | {
|
| 158 | __DZ_STRING str_;
|
| 159 | size_t inc_;
|
| 160 | public:
|
| 161 | //functions
|
| 162 | __Charset();
|
| 163 | __Charset(const __DZ_STRING & str,bool include);
|
| 164 | __NodeBase * Optimize(__ParseData & pdata);
|
| 165 | void RandString(__GenerateData & gdata) const;
|
| 166 | void Debug(std::ostream & out,int lvl) const;
|
| 167 | void Exclude();
|
| 168 | void AddChar(int ch);
|
| 169 | void AddRange(int from,int to);
|
| 170 | void AddRange(__Charset * node);
|
| 171 | void Unique();
|
| 172 | private:
|
| 173 | void unite(__Charset & node);
|
| 174 | void reverse();
|
| 175 | void unique();
|
| 176 | };
|
| 177 |
|
| 178 | struct __Repeat : public __NodeBase
|
| 179 | {
|
| 180 | static const int INFINITE = 1 << 16;
|
| 181 | private:
|
| 182 | static const int _REPEAT_MAX = __Repeat::INFINITE - 1;
|
| 183 | static const int _NON_GREEDY = 1 << 17;
|
| 184 | static const int _PROSSESSIVE = 1 << 18;
|
| 185 | static const int _CLEAR_FLAGS = _NON_GREEDY - 1;
|
| 186 | __NodeBase * node_;
|
| 187 | int min_,max_;
|
| 188 | public:
|
| 189 | //functions
|
| 190 | __Repeat(__NodeBase * node,int ch);
|
| 191 | __Repeat(__NodeBase * node,int min,int max);
|
| 192 | ~__Repeat();
|
| 193 | __NodeBase * Optimize(__ParseData & pdata);
|
| 194 | void RandString(__GenerateData & gdata) const;
|
| 195 | void Debug(std::ostream & out,int lvl) const;
|
| 196 | int Repeat(int ch);
|
| 197 | private:
|
| 198 | bool isInfinite() const{return (max_ & INFINITE) != 0;}
|
| 199 | bool isNonGreedy() const{return (min_ & _NON_GREEDY) != 0;}
|
| 200 | bool isPossessive() const{return (min_ & _PROSSESSIVE) != 0;}
|
| 201 | bool canRepeat() const{return !(min_ & (_NON_GREEDY | _PROSSESSIVE));}
|
| 202 | };
|
| 203 |
|
| 204 | class __Seq : public __NodeBase
|
| 205 | {
|
| 206 | typedef __DZ_VECTOR(__NodeBase *) __Con;
|
| 207 | __Con seq_;
|
| 208 | public:
|
| 209 | //functions
|
| 210 | explicit __Seq(__NodeBase * node);
|
| 211 | ~__Seq();
|
| 212 | __NodeBase * Optimize(__ParseData & pdata);
|
| 213 | void RandString(__GenerateData & gdata) const;
|
| 214 | void Debug(std::ostream & out,int lvl) const;
|
| 215 | void AppendNode(__NodeBase * node);
|
| 216 | };
|
| 217 |
|
| 218 | class __Group : public __NodeBase
|
| 219 | {
|
| 220 | static const int INDEX = 1 << 16; //group index flag
|
| 221 | static const size_t MAX_GROUPS = 9;
|
| 222 | __NodeBase * node_;
|
| 223 | size_t mark_;
|
| 224 | public:
|
| 225 | //functions
|
| 226 | __Group(__NodeBase * node,int mark);
|
| 227 | ~__Group();
|
| 228 | __NodeBase * Optimize(__ParseData & pdata);
|
| 229 | void RandString(__GenerateData & gdata) const;
|
| 230 | void Debug(std::ostream & out,int lvl) const;
|
| 231 | };
|
| 232 |
|
| 233 | class __Select : public __NodeBase
|
| 234 | {
|
| 235 | typedef __DZ_VECTOR(__NodeBase *) __Con;
|
| 236 | __Con sel_;
|
| 237 | size_t sz_;
|
| 238 | public:
|
| 239 | //functions
|
| 240 | explicit __Select(__NodeBase * node);
|
| 241 | ~__Select();
|
| 242 | __NodeBase * Optimize(__ParseData & pdata);
|
| 243 | void RandString(__GenerateData & gdata) const;
|
| 244 | void Debug(std::ostream & out,int lvl) const;
|
| 245 | void AppendNode(__NodeBase * node);
|
| 246 | };
|
| 247 |
|
| 248 | class __Ref : public __NodeBase
|
| 249 | {
|
| 250 | size_t index_;
|
| 251 | public:
|
| 252 | explicit __Ref(int index);
|
| 253 | __NodeBase * Optimize(__ParseData & pdata);
|
| 254 | void RandString(__GenerateData & gdata) const;
|
| 255 | void Debug(std::ostream & out,int lvl) const;
|
| 256 | };
|
| 257 |
|
| 258 | class __CRegxString
|
| 259 | {
|
| 260 | typedef std::pair<__NodeBase *,int> __Ret;
|
| 261 | public:
|
| 262 | __CRegxString();
|
| 263 | ~__CRegxString(){uninit();}
|
| 264 | void ParseRegx(const __DZ_STRING & regx,const Config * config);
|
| 265 | __DZ_STRING Regx() const{return regx_;}
|
| 266 | const __DZ_STRING & RandString();
|
| 267 | const __DZ_STRING & LastString() const{return str_;}
|
| 268 | void Debug(std::ostream & out) const;
|
| 269 | private:
|
| 270 | __CRegxString(const __CRegxString &);
|
| 271 | __CRegxString & operator =(const __CRegxString &);
|
| 272 | void uninit();
|
| 273 | __Ret processSeq(__ParseData & pdata);
|
| 274 | __Ret processSlash(bool bNode,__ParseData & pdata);
|
| 275 | __NodeBase * processSet(__ParseData & pdata);
|
| 276 | __NodeBase * processGroup(__ParseData & pdata);
|
| 277 | __Ret processSelect(__NodeBase * node,__ParseData & pdata);
|
| 278 | __NodeBase * processRepeat(__NodeBase * node,__ParseData & pdata);
|
| 279 | int processInt(int & result,__ParseData & pdata);
|
| 280 | bool processRange(int & result,__ParseData & pdata);
|
| 281 | int ignoreSubexpMarks(__ParseData & pdata);
|
| 282 | //fields:
|
| 283 | __DZ_STRING regx_;
|
| 284 | __DZ_STRING str_;
|
| 285 | __NodeBase * top_; //regx tree
|
| 286 | };
|
| 287 |
|
| 288 | NAMESAPCE_END
|
| 289 |
|
| 290 | #endif
|