Blame - regexp.c - codeaurora/busybox

blob: 02b1fe505acc4757e8c1e01ebbad0e0ddf1a0608 [file] [log] [blame]

Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	1	/* regexp.c */
				2
				3	#include "internal.h"
				4	#include "regexp.h"
				5	#include <setjmp.h>
				6	#include <stdio.h>
				7	#include <ctype.h>
				8
				9
Eric Andersen	b186d98	1999-12-03 09:19:54 +0000	[diff] [blame]	10	#if ( defined BB_GREP \|\| defined BB_SED)
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	11
				12	/* This also tries to find a needle in a haystack, but uses
				13	* real regular expressions.... The fake regular expression
				14	* version of find_match lives in utility.c. Using this version
				15	* will add 3.9k to busybox...
				16	* -Erik Andersen
				17	*/
				18	extern int find_match(char haystack, char needle, int ignoreCase)
				19	{
				20	int status;
				21	struct regexp* re;
				22	re = regcomp( needle);
				23	status = regexec(re, haystack, FALSE, ignoreCase);
				24	free( re);
				25	return( status);
				26	}
				27
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	28	#if defined BB_SED
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	29	/* This performs substitutions after a regexp match has been found.
				30	* The new string is returned. It is malloc'ed, and do must be freed. */
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	31	extern int replace_match(char haystack, char needle, char *newNeedle, int ignoreCase)
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	32	{
				33	int status;
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	34	struct regexp* re;
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	35	char s, buf[BUF_SIZE], d = buf;
				36
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	37	re = regcomp( needle);
				38	status = regexec(re, haystack, FALSE, ignoreCase);
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	39	if (status==TRUE) {
				40	s=haystack;
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	41
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	42	do {
				43	/* copy stuff from before the match */
				44	while (s < re->startp[0])
				45	d++ = s++;
				46	/* substitute for the matched part */
				47	regsub(re, newNeedle, d);
				48	s = re->endp[0];
				49	d += strlen(d);
				50	} while (regexec(re, s, FALSE, ignoreCase) == TRUE);
				51	/* copy stuff from after the match */
				52	while ( (d++ = s++) ) {}
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	53	d[0] = '\0';
				54	strcpy(haystack, buf);
				55	}
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	56	free( re);
				57	return( status);
				58	}
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	59	#endif
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	60
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	61
				62	/* code swiped from elvis-tiny 1.4 (a clone of vi) and adjusted to
				63	* suit the needs of busybox by Erik Andersen.
				64	*
				65	* From the README:
				66	* "Elvis is freely redistributable, in either source form or executable form.
				67	* There are no restrictions on how you may use it".
				68	* Elvis was written by Steve Kirkendall <kirkenda@cs.pdx.edu>
				69	*
				70	*
				71	* This file contains the code that compiles regular expressions and executes
				72	* them. It supports the same syntax and features as vi's regular expression
				73	* code. Specifically, the meta characters are:
				74	* ^ matches the beginning of a line
				75	* $ matches the end of a line
				76	* \< matches the beginning of a word
				77	* \> matches the end of a word
				78	* . matches any single character
				79	* [] matches any character in a character class
				80	* \( delimits the start of a subexpression
				81	* \) delimits the end of a subexpression
				82	* * repeats the preceding 0 or more times
				83	* NOTE: You cannot follow a \) with a *.
				84	*
				85	* The physical structure of a compiled RE is as follows:
				86	* - First, there is a one-byte value that says how many character classes
				87	* are used in this regular expression
				88	* - Next, each character class is stored as a bitmap that is 256 bits
				89	* (32 bytes) long.
				90	* - A mixture of literal characters and compiled meta characters follows.
				91	* This begins with M_BEGIN(0) and ends with M_END(0). All meta chars
				92	* are stored as a \n followed by a one-byte code, so they take up two
				93	* bytes apiece. Literal characters take up one byte apiece. \n can't
				94	* be used as a literal character.
				95	*
				96	*/
				97
				98
				99
				100	static char previous; / the previous regexp, used when null regexp is given */
				101	static char previous1; / a copy of the text from the previous substitution for regsub()*/
				102
				103
				104	/* These are used to classify or recognize meta-characters */
				105	#define META '\0'
				106	#define BASE_META(m) ((m) - 256)
				107	#define INT_META(c) ((c) + 256)
				108	#define IS_META(m) ((m) >= 256)
				109	#define IS_CLASS(m) ((m) >= M_CLASS(0) && (m) <= M_CLASS(9))
				110	#define IS_START(m) ((m) >= M_START(0) && (m) <= M_START(9))
				111	#define IS_END(m) ((m) >= M_END(0) && (m) <= M_END(9))
				112	#define IS_CLOSURE(m) ((m) >= M_SPLAT && (m) <= M_QMARK)
				113	#define ADD_META(s,m) ((s)++ = META, (s)++ = BASE_META(m))
				114	#define GET_META(s) ((s) == META ? INT_META(++(s)) : *s)
				115
				116	/* These are the internal codes used for each type of meta-character */
				117	#define M_BEGLINE 256 /* internal code for ^ */
				118	#define M_ENDLINE 257 /* internal code for $ */
				119	#define M_BEGWORD 258 /* internal code for \< */
				120	#define M_ENDWORD 259 /* internal code for \> */
				121	#define M_ANY 260 /* internal code for . */
				122	#define M_SPLAT 261 /* internal code for * */
				123	#define M_PLUS 262 /* internal code for \+ */
				124	#define M_QMARK 263 /* internal code for \? */
				125	#define M_CLASS(n) (264+(n)) /* internal code for [] */
				126	#define M_START(n) (274+(n)) /* internal code for \( */
				127	#define M_END(n) (284+(n)) /* internal code for \) */
				128
				129	/* These are used during compilation */
				130	static int class_cnt; /* used to assign class IDs */
				131	static int start_cnt; /* used to assign start IDs */
				132	static int end_stk[NSUBEXP];/* used to assign end IDs */
				133	static int end_sp;
				134	static char retext; / points to the text being compiled */
				135
				136	/* error-handling stuff */
				137	jmp_buf errorhandler;
Eric Andersen	b6a44b8	1999-11-13 04:47:09 +0000	[diff] [blame]	138	#define FAIL(why) do {fprintf(stderr, why); longjmp(errorhandler, 1);} while (0)
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	139
				140
				141
				142
				143	/* This function builds a bitmap for a particular class */
				144	/* text -- start of the class */
				145	/* bmap -- the bitmap */
				146	static char makeclass(char text, char* bmap)
				147	{
				148	int i;
				149	int complement = 0;
				150
				151
				152	/* zero the bitmap */
				153	for (i = 0; bmap && i < 32; i++)
				154	{
				155	bmap[i] = 0;
				156	}
				157
				158	/* see if we're going to complement this class */
				159	if (*text == '^')
				160	{
				161	text++;
				162	complement = 1;
				163	}
				164
				165	/* add in the characters */
				166	while (text && text != ']')
				167	{
				168	/* is this a span of characters? */
				169	if (text[1] == '-' && text[2])
				170	{
				171	/* spans can't be backwards */
				172	if (text[0] > text[2])
				173	{
				174	FAIL("Backwards span in []");
				175	}
				176
				177	/* add each character in the span to the bitmap */
				178	for (i = text[0]; bmap && i <= text[2]; i++)
				179	{
				180	bmap[i >> 3] \|= (1 << (i & 7));
				181	}
				182
				183	/* move past this span */
				184	text += 3;
				185	}
				186	else
				187	{
				188	/* add this single character to the span */
				189	i = *text++;
				190	if (bmap)
				191	{
				192	bmap[i >> 3] \|= (1 << (i & 7));
				193	}
				194	}
				195	}
				196
				197	/* make sure the closing ] is missing */
				198	if (*text++ != ']')
				199	{
				200	FAIL("] missing");
				201	}
				202
				203	/* if we're supposed to complement this class, then do so */
				204	if (complement && bmap)
				205	{
				206	for (i = 0; i < 32; i++)
				207	{
				208	bmap[i] = ~bmap[i];
				209	}
				210	}
				211
				212	return text;
				213	}
				214
				215
				216
				217
				218	/* This function gets the next character or meta character from a string.
				219	* The pointer is incremented by 1, or by 2 for \-quoted characters. For [],
				220	* a bitmap is generated via makeclass() (if re is given), and the
				221	* character-class text is skipped.
				222	*/
				223	static int gettoken(sptr, re)
				224	char **sptr;
				225	regexp *re;
				226	{
				227	int c;
				228
				229	c = **sptr;
				230	++*sptr;
				231	if (c == '\\')
				232	{
				233	c = **sptr;
				234	++*sptr;
				235	switch (c)
				236	{
				237	case '<':
				238	return M_BEGWORD;
				239
				240	case '>':
				241	return M_ENDWORD;
				242
				243	case '(':
				244	if (start_cnt >= NSUBEXP)
				245	{
				246	FAIL("Too many \\(s");
				247	}
				248	end_stk[end_sp++] = start_cnt;
				249	return M_START(start_cnt++);
				250
				251	case ')':
				252	if (end_sp <= 0)
				253	{
				254	FAIL("Mismatched \\)");
				255	}
				256	return M_END(end_stk[--end_sp]);
				257
				258	case '*':
				259	return M_SPLAT;
				260
				261	case '.':
				262	return M_ANY;
				263
				264	case '+':
				265	return M_PLUS;
				266
				267	case '?':
				268	return M_QMARK;
				269
				270	default:
				271	return c;
				272	}
				273	}
				274	else {
				275	switch (c)
				276	{
				277	case '^':
				278	if (*sptr == retext + 1)
				279	{
				280	return M_BEGLINE;
				281	}
				282	return c;
				283
				284	case '$':
				285	if (!**sptr)
				286	{
				287	return M_ENDLINE;
				288	}
				289	return c;
				290
				291	case '.':
				292	return M_ANY;
				293
				294	case '*':
				295	return M_SPLAT;
				296
				297	case '[':
				298	/* make sure we don't have too many classes */
				299	if (class_cnt >= 10)
				300	{
				301	FAIL("Too many []s");
				302	}
				303
				304	/* process the character list for this class */
				305	if (re)
				306	{
				307	/* generate the bitmap for this class */
				308	sptr = makeclass(sptr, re->program + 1 + 32 * class_cnt);
				309	}
				310	else
				311	{
				312	/* skip to end of the class */
				313	sptr = makeclass(sptr, (char *)0);
				314	}
				315	return M_CLASS(class_cnt++);
				316
				317	default:
				318	return c;
				319	}
				320	}
				321	/NOTREACHED/
				322	}
				323
				324
				325
				326
				327	/* This function calculates the number of bytes that will be needed for a
				328	* compiled RE. Its argument is the uncompiled version. It is not clever
				329	* about catching syntax errors; that is done in a later pass.
				330	*/
				331	static unsigned calcsize(text)
				332	char *text;
				333	{
				334	unsigned size;
				335	int token;
				336
				337	retext = text;
				338	class_cnt = 0;
				339	start_cnt = 1;
				340	end_sp = 0;
				341	size = 5;
				342	while ((token = gettoken(&text, (regexp *)0)) != 0)
				343	{
				344	if (IS_CLASS(token))
				345	{
				346	size += 34;
				347	}
				348	else if (IS_META(token))
				349	{
				350	size += 2;
				351	}
				352	else
				353	{
				354	size++;
				355	}
				356	}
				357
				358	return size;
				359	}
				360
				361
				362
				363	/---------------------------------------------------------------------------/
				364
				365
				366	/* This function checks for a match between a character and a token which is
				367	* known to represent a single character. It returns 0 if they match, or
				368	* 1 if they don't.
				369	*/
				370	static int match1(regexp* re, char ch, int token, int ignoreCase)
				371	{
				372	if (!ch)
				373	{
				374	/* the end of a line can't match any RE of width 1 */
				375	return 1;
				376	}
				377	if (token == M_ANY)
				378	{
				379	return 0;
				380	}
				381	else if (IS_CLASS(token))
				382	{
				383	if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7)))
				384	return 0;
				385	}
				386	else if (ch == token
				387	\|\| (ignoreCase==TRUE && isupper(ch) && tolower(ch) == token))
				388	{
				389	return 0;
				390	}
				391	return 1;
				392	}
				393
				394
				395
				396	/* This function checks characters up to and including the next closure, at
				397	* which point it does a recursive call to check the rest of it. This function
				398	* returns 0 if everything matches, or 1 if something doesn't match.
				399	*/
				400	/* re -- the regular expression */
				401	/* str -- the string */
				402	/* prog -- a portion of re->program, an compiled RE */
				403	/* here -- a portion of str, the string to compare it to */
				404	static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase)
				405	{
				406	int token;
				407	int nmatched;
				408	int closure;
				409
				410	for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog))
				411	{
				412	switch (token)
				413	{
				414	/case M_BEGLINE: can't happen; re->bol is used instead /
				415	case M_ENDLINE:
				416	if (*here)
				417	return 1;
				418	break;
				419
				420	case M_BEGWORD:
				421	if (here != str &&
				422	(here[-1] == '_' \|\|
				423	(isascii(here[-1]) && isalnum(here[-1]))))
				424	return 1;
				425	break;
				426
				427	case M_ENDWORD:
				428	if ((here[0] == '_' \|\| isascii(here[0])) && isalnum(here[0]))
				429	return 1;
				430	break;
				431
				432	case M_START(0):
				433	case M_START(1):
				434	case M_START(2):
				435	case M_START(3):
				436	case M_START(4):
				437	case M_START(5):
				438	case M_START(6):
				439	case M_START(7):
				440	case M_START(8):
				441	case M_START(9):
				442	re->startp[token - M_START(0)] = (char *)here;
				443	break;
				444
				445	case M_END(0):
				446	case M_END(1):
				447	case M_END(2):
				448	case M_END(3):
				449	case M_END(4):
				450	case M_END(5):
				451	case M_END(6):
				452	case M_END(7):
				453	case M_END(8):
				454	case M_END(9):
				455	re->endp[token - M_END(0)] = (char *)here;
				456	if (token == M_END(0))
				457	{
				458	return 0;
				459	}
				460	break;
				461
				462	default: /* literal, M_CLASS(n), or M_ANY */
				463	if (match1(re, *here, token, ignoreCase) != 0)
				464	return 1;
				465	here++;
				466	}
				467	}
				468
				469	/* C L O S U R E */
				470
				471	/* step 1: see what we have to match against, and move "prog" to point
				472	* the the remainder of the compiled RE.
				473	*/
				474	closure = token;
				475	prog++, token = GET_META(prog);
				476	prog++;
				477
				478	/* step 2: see how many times we can match that token against the string */
				479	for (nmatched = 0;
				480	(closure != M_QMARK \|\| nmatched < 1) && here && match1(re, here, token, ignoreCase) == 0;
				481	nmatched++, here++)
				482	{
				483	}
				484
				485	/* step 3: try to match the remainder, and back off if it doesn't */
				486	while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0)
				487	{
				488	nmatched--;
				489	here--;
				490	}
				491
				492	/* so how did it work out? */
				493	if (nmatched >= ((closure == M_PLUS) ? 1 : 0))
				494	return 0;
				495	return 1;
				496	}
				497
				498
				499	/* This function compiles a regexp. */
				500	extern regexp regcomp(char text)
				501	{
				502	int needfirst;
				503	unsigned size;
				504	int token;
				505	int peek;
				506	char *build;
Eric Andersen	96bcfd3	1999-11-12 01:30:18 +0000	[diff] [blame]	507	regexp *re; // Ignore compiler whining. If we longjmp, we don't use re anymore.
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	508
				509
				510	/* prepare for error handling */
				511	re = (regexp *)0;
				512	if (setjmp(errorhandler))
				513	{
				514	if (re)
				515	{
				516	free(re);
				517	}
				518	return (regexp *)0;
				519	}
				520
				521	/* if an empty regexp string was given, use the previous one */
				522	if (*text == 0)
				523	{
				524	if (!previous)
				525	{
				526	FAIL("No previous RE");
				527	}
				528	text = previous;
				529	}
				530	else /* non-empty regexp given, so remember it */
				531	{
				532	if (previous)
				533	free(previous);
				534	previous = (char *)malloc((unsigned)(strlen(text) + 1));
				535	if (previous)
				536	strcpy(previous, text);
				537	}
				538
				539	/* allocate memory */
				540	class_cnt = 0;
				541	start_cnt = 1;
				542	end_sp = 0;
				543	retext = text;
				544	size = calcsize(text) + sizeof(regexp);
				545	re = (regexp *)malloc((unsigned)size);
				546
				547	if (!re)
				548	{
				549	FAIL("Not enough memory for this RE");
				550	}
				551
				552	/* compile it */
				553	build = &re->program[1 + 32 * class_cnt];
				554	re->program[0] = class_cnt;
				555	for (token = 0; token < NSUBEXP; token++)
				556	{
				557	re->startp[token] = re->endp[token] = (char *)0;
				558	}
				559	re->first = 0;
				560	re->bol = 0;
				561	re->minlen = 0;
				562	needfirst = 1;
				563	class_cnt = 0;
				564	start_cnt = 1;
				565	end_sp = 0;
				566	retext = text;
				567	for (token = M_START(0), peek = gettoken(&text, re);
				568	token;
				569	token = peek, peek = gettoken(&text, re))
				570	{
				571	/* special processing for the closure operator */
				572	if (IS_CLOSURE(peek))
				573	{
				574	/* detect misuse of closure operator */
				575	if (IS_START(token))
				576	{
				577	FAIL("* or \\+ or \\? follows nothing");
				578	}
				579	else if (IS_META(token) && token != M_ANY && !IS_CLASS(token))
				580	{
				581	FAIL("* or \\+ or \\? can only follow a normal character or . or []");
				582	}
				583
				584	/* it is okay -- make it prefix instead of postfix */
				585	ADD_META(build, peek);
				586
				587	/* take care of "needfirst" - is this the first char? */
				588	if (needfirst && peek == M_PLUS && !IS_META(token))
				589	{
				590	re->first = token;
				591	}
				592	needfirst = 0;
				593
				594	/* we used "peek" -- need to refill it */
				595	peek = gettoken(&text, re);
				596	if (IS_CLOSURE(peek))
				597	{
				598	FAIL("* or \\+ or \\? doubled up");
				599	}
				600	}
				601	else if (!IS_META(token))
				602	{
				603	/* normal char is NOT argument of closure */
				604	if (needfirst)
				605	{
				606	re->first = token;
				607	needfirst = 0;
				608	}
				609	re->minlen++;
				610	}
				611	else if (token == M_ANY \|\| IS_CLASS(token))
				612	{
				613	/* . or [] is NOT argument of closure */
				614	needfirst = 0;
				615	re->minlen++;
				616	}
				617
				618	/* the "token" character is not closure -- process it normally */
				619	if (token == M_BEGLINE)
				620	{
				621	/* set the BOL flag instead of storing M_BEGLINE */
				622	re->bol = 1;
				623	}
				624	else if (IS_META(token))
				625	{
				626	ADD_META(build, token);
				627	}
				628	else
				629	{
				630	*build++ = token;
				631	}
				632	}
				633
				634	/* end it with a \) which MUST MATCH the opening \( */
				635	ADD_META(build, M_END(0));
				636	if (end_sp > 0)
				637	{
				638	FAIL("Not enough \\)s");
				639	}
				640
				641	return re;
				642	}
				643
				644
				645
				646
				647	/* This function searches through a string for text that matches an RE. */
				648	/* re -- the compiled regexp to search for */
				649	/* str -- the string to search through */
				650	/* bol -- does str start at the beginning of a line? (boolean) */
				651	/* ignoreCase -- ignoreCase or not */
				652	extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase)
				653	{
				654	char prog; / the entry point of re->program */
				655	int len; /* length of the string */
				656	char *here;
				657
				658	/* if must start at the beginning of a line, and this isn't, then fail */
				659	if (re->bol && bol==TRUE)
				660	{
				661	return FALSE;
				662	}
				663
				664	len = strlen(str);
				665	prog = re->program + 1 + 32 * re->program[0];
				666
				667	/* search for the RE in the string */
				668	if (re->bol)
				669	{
				670	/* must occur at BOL */
				671	if ((re->first
				672	&& match1(re, (char )str, re->first, ignoreCase))/* wrong first letter? */
				673	\|\| len < re->minlen /* not long enough? */
				674	\|\| match(re, (char )str, prog, str, ignoreCase)) / doesn't match? */
				675	return FALSE; /* THEN FAIL! */
				676	}
				677	else if (ignoreCase == FALSE)
				678	{
				679	/* can occur anywhere in the line, noignorecase */
				680	for (here = (char *)str;
				681	(re->first && re->first != *here)
				682	\|\| match(re, (char *)str, prog, here, ignoreCase);
				683	here++, len--)
				684	{
				685	if (len < re->minlen)
				686	return FALSE;
				687	}
				688	}
				689	else
				690	{
				691	/* can occur anywhere in the line, ignorecase */
				692	for (here = (char *)str;
				693	(re->first && match1(re, *here, (int)re->first, ignoreCase))
				694	\|\| match(re, (char *)str, prog, here, ignoreCase);
				695	here++, len--)
				696	{
				697	if (len < re->minlen)
				698	return FALSE;
				699	}
				700	}
				701
				702	/* if we didn't fail, then we must have succeeded */
				703	return TRUE;
				704	}
				705
				706
				707
				708
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	709	#if defined BB_SED
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	710	/* This performs substitutions after a regexp match has been found. */
				711	extern void regsub(regexp* re, char* src, char* dst)
				712	{
				713	char *cpy;
				714	char *end;
				715	char c;
				716	char *start;
				717	int mod;
				718
				719	mod = 0;
				720
				721	start = src;
				722	while ((c = *src++) != '\0')
				723	{
				724	/* recognize any meta characters */
				725	if (c == '&')
				726	{
				727	cpy = re->startp[0];
				728	end = re->endp[0];
				729	}
				730	else if (c == '~')
				731	{
				732	cpy = previous1;
				733	if (cpy)
				734	end = cpy + strlen(cpy);
				735	}
				736	else
				737	if (c == '\\')
				738	{
				739	c = *src++;
				740	switch (c)
				741	{
				742	case '0':
				743	case '1':
				744	case '2':
				745	case '3':
				746	case '4':
				747	case '5':
				748	case '6':
				749	case '7':
				750	case '8':
				751	case '9':
				752	/* \0 thru \9 mean "copy subexpression" */
				753	c -= '0';
				754	cpy = re->startp[(int)c];
				755	end = re->endp[(int)c];
				756	break;
				757	case 'U':
				758	case 'u':
				759	case 'L':
				760	case 'l':
				761	/* \U and \L mean "convert to upper/lowercase" */
				762	mod = c;
				763	continue;
				764
				765	case 'E':
				766	case 'e':
				767	/* \E ends the \U or \L */
				768	mod = 0;
				769	continue;
				770	case '&':
				771	/* "\&" means "original text" */
				772	*dst++ = c;
				773	continue;
				774
				775	case '~':
				776	/* "\~" means "previous text, if any" */
				777	*dst++ = c;
				778	continue;
				779	default:
				780	/* ordinary char preceded by backslash */
				781	*dst++ = c;
				782	continue;
				783	}
				784	}
				785	else
				786	{
				787	/* ordinary character, so just copy it */
				788	*dst++ = c;
				789	continue;
				790	}
				791
				792	/* Note: to reach this point in the code, we must have evaded
				793	* all "continue" statements. To do that, we must have hit
				794	* a metacharacter that involves copying.
				795	*/
				796
				797	/* if there is nothing to copy, loop */
				798	if (!cpy)
				799	continue;
				800
				801	/* copy over a portion of the original */
				802	while (cpy < end)
				803	{
				804	switch (mod)
				805	{
				806	case 'U':
				807	case 'u':
				808	/* convert to uppercase */
				809	if (isascii(cpy) && islower(cpy))
				810	{
				811	dst++ = toupper(cpy);
				812	cpy++;
				813	}
				814	else
				815	{
				816	dst++ = cpy++;
				817	}
				818	break;
				819
				820	case 'L':
				821	case 'l':
				822	/* convert to lowercase */
				823	if (isascii(cpy) && isupper(cpy))
				824	{
				825	dst++ = tolower(cpy);
				826	cpy++;
				827	}
				828	else
				829	{
				830	dst++ = cpy++;
				831	}
				832	break;
				833
				834	default:
				835	/* copy without any conversion */
				836	dst++ = cpy++;
				837	}
				838
				839	/* \u and \l end automatically after the first char */
				840	if (mod && (mod == 'u' \|\| mod == 'l'))
				841	{
				842	mod = 0;
				843	}
				844	}
				845	}
				846	*dst = '\0';
				847
				848	/* remember what text we inserted this time */
				849	if (previous1)
				850	free(previous1);
				851	previous1 = (char *)malloc((unsigned)(strlen(start) + 1));
				852	if (previous1)
				853	strcpy(previous1, start);
				854	}
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	855	#endif
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	856
				857	#endif /* BB_REGEXP */
				858
				859