blob: c3b67b343401b5d043ef492b9dd59cd74658c266 [file] [log] [blame]
/*************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
#ifndef DOZERG_REGXSTRING_IMPL_H_20091012
#define DOZERG_REGXSTRING_IMPL_H_20091012
#include <string>
#include <vector>
#include <iosfwd>
#include <utility>
#include <memory>
#define _DZ_DEBUG 0
#define _MEM_LEAK 0
//allocator choice
#ifndef __GNUC__
# define __DZ_ALLOC std::allocator
#else
# ifndef NDEBUG
# define __DZ_ALLOC std::allocator
# else
# include <ext/pool_allocator.h>
# define __DZ_ALLOC __gnu_cxx::__pool_alloc
# endif
#endif
//stl containers redefine
//Sequence
#define __DZ_BASIC_STRING(C) std::basic_string< C,std::char_traits< C >,__DZ_ALLOC< C > >
#define __DZ_BASIC_STRING1(C,T) std::basic_string< C,T,__DZ_ALLOC< C > >
#define __DZ_STRING __DZ_BASIC_STRING(char)
#define __DZ_WSTRING __DZ_BASIC_STRING(wchar_t)
#define __DZ_DEQUE(T) std::deque< T,__DZ_ALLOC< T > >
#define __DZ_LIST(T) std::list< T,__DZ_ALLOC< T > >
#define __DZ_VECTOR(T) std::vector< T,__DZ_ALLOC< T > >
//Associative
#define __DZ_MAP(K,V) std::map< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
#define __DZ_MAP1(K,V,C) std::map< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
#define __DZ_MULTIMAP(K,V) std::multimap< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
#define __DZ_MULTIMAP1(K,V,C) std::multimap< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
#define __DZ_SET(K) std::set< K,std::less< K >,__DZ_ALLOC< K > >
#define __DZ_SET1(K,C) std::set< K,C,__DZ_ALLOC< K > >
#define __DZ_MULTISET(K) std::multiset< K,std::less< K >,__DZ_ALLOC< K > >
#define __DZ_MULTISET1(K,C) std::multiset< K,C,__DZ_ALLOC< K > >
//String Stream
#define __DZ_BASIC_ISTRINGSTREAM(C) std::basic_istringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
#define __DZ_BASIC_ISTRINGSTREAM1(C,T) std::basic_istringstream< C,T,__DZ_ALLOC< C > >
#define __DZ_BASIC_OSTRINGSTREAM(C) std::basic_ostringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
#define __DZ_BASIC_OSTRINGSTREAM1(C,T) std::basic_ostringstream< C,T,__DZ_ALLOC< C > >
#define __DZ_BASIC_STRINGSTREAM(C) std::basic_stringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
#define __DZ_BASIC_STRINGSTREAM1(C,T) std::basic_stringstream< C,T,__DZ_ALLOC< C > >
#define __DZ_ISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(char)
#define __DZ_OSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(char)
#define __DZ_STRINGSTREAM __DZ_BASIC_STRINGSTREAM(char)
#define __DZ_WISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(wchar_t)
#define __DZ_WOSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(wchar_t)
#define __DZ_WSTRINGSTREAM __DZ_BASIC_STRINGSTREAM(wchar_t)
//Stream Buf
#define __DZ_BASIC_STRINGBUF(C) std::basic_stringbuf< C,std::char_traits< C >,__DZ_ALLOC< C > >
#define __DZ_BASIC_STRINGBUF1(C,T) std::basic_stringbuf< C,T,__DZ_ALLOC< C > >
#define __DZ_STRINGBUF __DZ_BASIC_STRINGBUF(char)
#define __DZ_WSTRINGBUF __DZ_BASIC_STRINGBUF(wchar_t)
//Extension
#define __DZ_ROPE(T) __gnu_cxx::rope< T,__DZ_ALLOC< T > >
#define __DZ_SLIST(T) __gnu_cxx::slist< T,__DZ_ALLOC< T > >
#define REGXSTRING_NS __DZ_Regx_String
#define NAMESAPCE_BEGIN namespace __DZ_Regx_String{
#define NAMESAPCE_END }
struct Config;
NAMESAPCE_BEGIN
typedef std::pair<size_t,size_t> __RefValue;
typedef __DZ_VECTOR(__RefValue) __Refs;
typedef __DZ_VECTOR(char) __Ends;
struct __ParseData{
__Ends ends_;
const Config & config_;
size_t i_;
int ref_;
//functions:
explicit __ParseData(const Config & config)
: config_(config)
, i_(0)
, ref_(0)
{}
int inEnds(int ch) const;
};
struct __GenerateData
{
__Refs refs_;
__DZ_OSTRINGSTREAM & oss_;
explicit __GenerateData(__DZ_OSTRINGSTREAM & oss)
: oss_(oss)
{}
};
struct __NodeBase
{
static __NodeBase * const REP_NULL; //replace with NULL(0)
#if _MEM_LEAK
static int ref;
__NodeBase(){++ref;}
#endif
virtual ~__NodeBase();
virtual __NodeBase * Optimize(__ParseData & pdata) = 0;
virtual void RandString(__GenerateData & gdata) const = 0;
virtual void Debug(std::ostream & out,int lvl) const = 0;
virtual int Repeat(int ch);
virtual void AppendNode(__NodeBase * node);
};
class __Edge : public __NodeBase
{
bool begin_;
public:
explicit __Edge(int ch);
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
};
class __Text : public __NodeBase
{
__DZ_STRING str_;
public:
//functions
explicit __Text(int ch);
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
__Text & operator +=(const __Text & other){str_ += other.str_;return *this;}
};
class __Charset : public __NodeBase
{
__DZ_STRING str_;
size_t inc_;
public:
//functions
__Charset();
__Charset(const __DZ_STRING & str,bool include);
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
void Exclude();
void AddChar(int ch);
void AddRange(int from,int to);
void AddRange(__Charset * node);
void Unique();
private:
void unite(__Charset & node);
void reverse();
void unique();
};
struct __Repeat : public __NodeBase
{
static const int INFINITE = 1 << 16;
private:
static const int _REPEAT_MAX = __Repeat::INFINITE - 1;
static const int _NON_GREEDY = 1 << 17;
static const int _PROSSESSIVE = 1 << 18;
static const int _CLEAR_FLAGS = _NON_GREEDY - 1;
__NodeBase * node_;
int min_,max_;
public:
//functions
__Repeat(__NodeBase * node,int ch);
__Repeat(__NodeBase * node,int min,int max);
~__Repeat();
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
int Repeat(int ch);
private:
bool isInfinite() const{return (max_ & INFINITE) != 0;}
bool isNonGreedy() const{return (min_ & _NON_GREEDY) != 0;}
bool isPossessive() const{return (min_ & _PROSSESSIVE) != 0;}
bool canRepeat() const{return !(min_ & (_NON_GREEDY | _PROSSESSIVE));}
};
class __Seq : public __NodeBase
{
typedef __DZ_VECTOR(__NodeBase *) __Con;
__Con seq_;
public:
//functions
explicit __Seq(__NodeBase * node);
~__Seq();
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
void AppendNode(__NodeBase * node);
};
class __Group : public __NodeBase
{
static const int INDEX = 1 << 16; //group index flag
static const size_t MAX_GROUPS = 9;
__NodeBase * node_;
size_t mark_;
public:
//functions
__Group(__NodeBase * node,int mark);
~__Group();
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
};
class __Select : public __NodeBase
{
typedef __DZ_VECTOR(__NodeBase *) __Con;
__Con sel_;
size_t sz_;
public:
//functions
explicit __Select(__NodeBase * node);
~__Select();
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
void AppendNode(__NodeBase * node);
};
class __Ref : public __NodeBase
{
size_t index_;
public:
explicit __Ref(int index);
__NodeBase * Optimize(__ParseData & pdata);
void RandString(__GenerateData & gdata) const;
void Debug(std::ostream & out,int lvl) const;
};
class __CRegxString
{
typedef std::pair<__NodeBase *,int> __Ret;
public:
__CRegxString();
~__CRegxString(){uninit();}
void ParseRegx(const __DZ_STRING & regx,const Config * config);
__DZ_STRING Regx() const{return regx_;}
const __DZ_STRING & RandString();
const __DZ_STRING & LastString() const{return str_;}
void Debug(std::ostream & out) const;
private:
__CRegxString(const __CRegxString &);
__CRegxString & operator =(const __CRegxString &);
void uninit();
__Ret processSeq(__ParseData & pdata);
__Ret processSlash(bool bNode,__ParseData & pdata);
__NodeBase * processSet(__ParseData & pdata);
__NodeBase * processGroup(__ParseData & pdata);
__Ret processSelect(__NodeBase * node,__ParseData & pdata);
__NodeBase * processRepeat(__NodeBase * node,__ParseData & pdata);
int processInt(int & result,__ParseData & pdata);
bool processRange(int & result,__ParseData & pdata);
int ignoreSubexpMarks(__ParseData & pdata);
//fields:
__DZ_STRING regx_;
__DZ_STRING str_;
__NodeBase * top_; //regx tree
};
NAMESAPCE_END
#endif