Blame - regexp.c - codeaurora/busybox

blob: 11b46c72029677cc1788ffa4a742455172a3ae89 [file] [log] [blame]

Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	1	/* regexp.c */
				2
				3	#include "internal.h"
				4	#include "regexp.h"
				5	#include <setjmp.h>
				6	#include <stdio.h>
				7	#include <ctype.h>
				8
				9
Eric Andersen	b186d98	1999-12-03 09:19:54 +0000	[diff] [blame]	10	#if ( defined BB_GREP \|\| defined BB_SED)
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	11
				12	/* This also tries to find a needle in a haystack, but uses
				13	* real regular expressions.... The fake regular expression
				14	* version of find_match lives in utility.c. Using this version
				15	* will add 3.9k to busybox...
				16	* -Erik Andersen
				17	*/
				18	extern int find_match(char haystack, char needle, int ignoreCase)
				19	{
				20	int status;
				21	struct regexp* re;
				22	re = regcomp( needle);
				23	status = regexec(re, haystack, FALSE, ignoreCase);
				24	free( re);
				25	return( status);
				26	}
				27
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	28	#if defined BB_SED
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	29	/* This performs substitutions after a regexp match has been found.
				30	* The new string is returned. It is malloc'ed, and do must be freed. */
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	31	extern int replace_match(char haystack, char needle, char *newNeedle, int ignoreCase)
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	32	{
				33	int status;
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	34	struct regexp* re;
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	35	char s, buf[BUF_SIZE], d = buf;
				36
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	37	re = regcomp( needle);
				38	status = regexec(re, haystack, FALSE, ignoreCase);
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	39	if (status==TRUE) {
				40	s=haystack;
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	41
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	42	do {
				43	/* copy stuff from before the match */
				44	while (s < re->startp[0])
				45	d++ = s++;
				46	/* substitute for the matched part */
				47	regsub(re, newNeedle, d);
				48	s = re->endp[0];
				49	d += strlen(d);
				50	} while (regexec(re, s, FALSE, ignoreCase) == TRUE);
				51	/* copy stuff from after the match */
				52	while ( (d++ = s++) ) {}
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	53	d[0] = '\0';
				54	strcpy(haystack, buf);
				55	}
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	56	free( re);
				57	return( status);
				58	}
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	59	#endif
Eric Andersen	c1525e8	1999-10-29 00:07:31 +0000	[diff] [blame]	60
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	61
				62	/* code swiped from elvis-tiny 1.4 (a clone of vi) and adjusted to
				63	* suit the needs of busybox by Erik Andersen.
				64	*
				65	* From the README:
				66	* "Elvis is freely redistributable, in either source form or executable form.
				67	* There are no restrictions on how you may use it".
				68	* Elvis was written by Steve Kirkendall <kirkenda@cs.pdx.edu>
				69	*
				70	*
				71	* This file contains the code that compiles regular expressions and executes
				72	* them. It supports the same syntax and features as vi's regular expression
				73	* code. Specifically, the meta characters are:
				74	* ^ matches the beginning of a line
				75	* $ matches the end of a line
				76	* \< matches the beginning of a word
				77	* \> matches the end of a word
				78	* . matches any single character
				79	* [] matches any character in a character class
				80	* \( delimits the start of a subexpression
				81	* \) delimits the end of a subexpression
				82	* * repeats the preceding 0 or more times
				83	* NOTE: You cannot follow a \) with a *.
				84	*
				85	* The physical structure of a compiled RE is as follows:
				86	* - First, there is a one-byte value that says how many character classes
				87	* are used in this regular expression
				88	* - Next, each character class is stored as a bitmap that is 256 bits
				89	* (32 bytes) long.
				90	* - A mixture of literal characters and compiled meta characters follows.
				91	* This begins with M_BEGIN(0) and ends with M_END(0). All meta chars
				92	* are stored as a \n followed by a one-byte code, so they take up two
				93	* bytes apiece. Literal characters take up one byte apiece. \n can't
				94	* be used as a literal character.
				95	*
				96	*/
				97
				98
				99
				100	static char previous; / the previous regexp, used when null regexp is given */
Eric Andersen	abc0f4f	1999-12-08 23:19:36 +0000	[diff] [blame]	101	#if defined BB_SED
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	102	static char previous1; / a copy of the text from the previous substitution for regsub()*/
Eric Andersen	abc0f4f	1999-12-08 23:19:36 +0000	[diff] [blame]	103	#endif
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	104
				105
				106	/* These are used to classify or recognize meta-characters */
				107	#define META '\0'
				108	#define BASE_META(m) ((m) - 256)
				109	#define INT_META(c) ((c) + 256)
				110	#define IS_META(m) ((m) >= 256)
				111	#define IS_CLASS(m) ((m) >= M_CLASS(0) && (m) <= M_CLASS(9))
				112	#define IS_START(m) ((m) >= M_START(0) && (m) <= M_START(9))
				113	#define IS_END(m) ((m) >= M_END(0) && (m) <= M_END(9))
				114	#define IS_CLOSURE(m) ((m) >= M_SPLAT && (m) <= M_QMARK)
				115	#define ADD_META(s,m) ((s)++ = META, (s)++ = BASE_META(m))
				116	#define GET_META(s) ((s) == META ? INT_META(++(s)) : *s)
				117
				118	/* These are the internal codes used for each type of meta-character */
				119	#define M_BEGLINE 256 /* internal code for ^ */
				120	#define M_ENDLINE 257 /* internal code for $ */
				121	#define M_BEGWORD 258 /* internal code for \< */
				122	#define M_ENDWORD 259 /* internal code for \> */
				123	#define M_ANY 260 /* internal code for . */
				124	#define M_SPLAT 261 /* internal code for * */
				125	#define M_PLUS 262 /* internal code for \+ */
				126	#define M_QMARK 263 /* internal code for \? */
				127	#define M_CLASS(n) (264+(n)) /* internal code for [] */
				128	#define M_START(n) (274+(n)) /* internal code for \( */
				129	#define M_END(n) (284+(n)) /* internal code for \) */
				130
				131	/* These are used during compilation */
				132	static int class_cnt; /* used to assign class IDs */
				133	static int start_cnt; /* used to assign start IDs */
				134	static int end_stk[NSUBEXP];/* used to assign end IDs */
				135	static int end_sp;
				136	static char retext; / points to the text being compiled */
				137
				138	/* error-handling stuff */
				139	jmp_buf errorhandler;
Eric Andersen	b6a44b8	1999-11-13 04:47:09 +0000	[diff] [blame]	140	#define FAIL(why) do {fprintf(stderr, why); longjmp(errorhandler, 1);} while (0)
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	141
				142
				143
				144
				145	/* This function builds a bitmap for a particular class */
				146	/* text -- start of the class */
				147	/* bmap -- the bitmap */
				148	static char makeclass(char text, char* bmap)
				149	{
				150	int i;
				151	int complement = 0;
				152
				153
				154	/* zero the bitmap */
				155	for (i = 0; bmap && i < 32; i++)
				156	{
				157	bmap[i] = 0;
				158	}
				159
				160	/* see if we're going to complement this class */
				161	if (*text == '^')
				162	{
				163	text++;
				164	complement = 1;
				165	}
				166
				167	/* add in the characters */
				168	while (text && text != ']')
				169	{
				170	/* is this a span of characters? */
				171	if (text[1] == '-' && text[2])
				172	{
				173	/* spans can't be backwards */
				174	if (text[0] > text[2])
				175	{
				176	FAIL("Backwards span in []");
				177	}
				178
				179	/* add each character in the span to the bitmap */
				180	for (i = text[0]; bmap && i <= text[2]; i++)
				181	{
				182	bmap[i >> 3] \|= (1 << (i & 7));
				183	}
				184
				185	/* move past this span */
				186	text += 3;
				187	}
				188	else
				189	{
				190	/* add this single character to the span */
				191	i = *text++;
				192	if (bmap)
				193	{
				194	bmap[i >> 3] \|= (1 << (i & 7));
				195	}
				196	}
				197	}
				198
				199	/* make sure the closing ] is missing */
				200	if (*text++ != ']')
				201	{
				202	FAIL("] missing");
				203	}
				204
				205	/* if we're supposed to complement this class, then do so */
				206	if (complement && bmap)
				207	{
				208	for (i = 0; i < 32; i++)
				209	{
				210	bmap[i] = ~bmap[i];
				211	}
				212	}
				213
				214	return text;
				215	}
				216
				217
				218
				219
				220	/* This function gets the next character or meta character from a string.
				221	* The pointer is incremented by 1, or by 2 for \-quoted characters. For [],
				222	* a bitmap is generated via makeclass() (if re is given), and the
				223	* character-class text is skipped.
				224	*/
				225	static int gettoken(sptr, re)
				226	char **sptr;
				227	regexp *re;
				228	{
				229	int c;
				230
				231	c = **sptr;
				232	++*sptr;
				233	if (c == '\\')
				234	{
				235	c = **sptr;
				236	++*sptr;
				237	switch (c)
				238	{
				239	case '<':
				240	return M_BEGWORD;
				241
				242	case '>':
				243	return M_ENDWORD;
				244
				245	case '(':
				246	if (start_cnt >= NSUBEXP)
				247	{
				248	FAIL("Too many \\(s");
				249	}
				250	end_stk[end_sp++] = start_cnt;
				251	return M_START(start_cnt++);
				252
				253	case ')':
				254	if (end_sp <= 0)
				255	{
				256	FAIL("Mismatched \\)");
				257	}
				258	return M_END(end_stk[--end_sp]);
				259
				260	case '*':
				261	return M_SPLAT;
				262
				263	case '.':
				264	return M_ANY;
				265
				266	case '+':
				267	return M_PLUS;
				268
				269	case '?':
				270	return M_QMARK;
				271
				272	default:
				273	return c;
				274	}
				275	}
				276	else {
				277	switch (c)
				278	{
				279	case '^':
				280	if (*sptr == retext + 1)
				281	{
				282	return M_BEGLINE;
				283	}
				284	return c;
				285
				286	case '$':
				287	if (!**sptr)
				288	{
				289	return M_ENDLINE;
				290	}
				291	return c;
				292
				293	case '.':
				294	return M_ANY;
				295
				296	case '*':
				297	return M_SPLAT;
				298
				299	case '[':
				300	/* make sure we don't have too many classes */
				301	if (class_cnt >= 10)
				302	{
				303	FAIL("Too many []s");
				304	}
				305
				306	/* process the character list for this class */
				307	if (re)
				308	{
				309	/* generate the bitmap for this class */
				310	sptr = makeclass(sptr, re->program + 1 + 32 * class_cnt);
				311	}
				312	else
				313	{
				314	/* skip to end of the class */
				315	sptr = makeclass(sptr, (char *)0);
				316	}
				317	return M_CLASS(class_cnt++);
				318
				319	default:
				320	return c;
				321	}
				322	}
				323	/NOTREACHED/
				324	}
				325
				326
				327
				328
				329	/* This function calculates the number of bytes that will be needed for a
				330	* compiled RE. Its argument is the uncompiled version. It is not clever
				331	* about catching syntax errors; that is done in a later pass.
				332	*/
				333	static unsigned calcsize(text)
				334	char *text;
				335	{
				336	unsigned size;
				337	int token;
				338
				339	retext = text;
				340	class_cnt = 0;
				341	start_cnt = 1;
				342	end_sp = 0;
				343	size = 5;
				344	while ((token = gettoken(&text, (regexp *)0)) != 0)
				345	{
				346	if (IS_CLASS(token))
				347	{
				348	size += 34;
				349	}
				350	else if (IS_META(token))
				351	{
				352	size += 2;
				353	}
				354	else
				355	{
				356	size++;
				357	}
				358	}
				359
				360	return size;
				361	}
				362
				363
				364
				365	/---------------------------------------------------------------------------/
				366
				367
				368	/* This function checks for a match between a character and a token which is
				369	* known to represent a single character. It returns 0 if they match, or
				370	* 1 if they don't.
				371	*/
				372	static int match1(regexp* re, char ch, int token, int ignoreCase)
				373	{
				374	if (!ch)
				375	{
				376	/* the end of a line can't match any RE of width 1 */
				377	return 1;
				378	}
				379	if (token == M_ANY)
				380	{
				381	return 0;
				382	}
				383	else if (IS_CLASS(token))
				384	{
				385	if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7)))
				386	return 0;
				387	}
Erik Andersen	f4a99cc	2000-01-05 05:49:51 +0000	[diff] [blame]	388	//fprintf(stderr, "match1: ch='%c' token='%c': ", ch, token);
				389	if (ch == token
				390	\|\| (ignoreCase==TRUE && tolower(ch) == tolower(token)))
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	391	{
Erik Andersen	f4a99cc	2000-01-05 05:49:51 +0000	[diff] [blame]	392	//fprintf(stderr, "match\n");
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	393	return 0;
				394	}
Erik Andersen	f4a99cc	2000-01-05 05:49:51 +0000	[diff] [blame]	395	//fprintf(stderr, "no match\n");
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	396	return 1;
				397	}
				398
				399
				400
				401	/* This function checks characters up to and including the next closure, at
				402	* which point it does a recursive call to check the rest of it. This function
				403	* returns 0 if everything matches, or 1 if something doesn't match.
				404	*/
				405	/* re -- the regular expression */
				406	/* str -- the string */
				407	/* prog -- a portion of re->program, an compiled RE */
				408	/* here -- a portion of str, the string to compare it to */
				409	static int match(regexp* re, char* str, char* prog, char* here, int ignoreCase)
				410	{
				411	int token;
				412	int nmatched;
				413	int closure;
				414
				415	for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog))
				416	{
				417	switch (token)
				418	{
				419	/case M_BEGLINE: can't happen; re->bol is used instead /
				420	case M_ENDLINE:
				421	if (*here)
				422	return 1;
				423	break;
				424
				425	case M_BEGWORD:
				426	if (here != str &&
				427	(here[-1] == '_' \|\|
				428	(isascii(here[-1]) && isalnum(here[-1]))))
				429	return 1;
				430	break;
				431
				432	case M_ENDWORD:
				433	if ((here[0] == '_' \|\| isascii(here[0])) && isalnum(here[0]))
				434	return 1;
				435	break;
				436
				437	case M_START(0):
				438	case M_START(1):
				439	case M_START(2):
				440	case M_START(3):
				441	case M_START(4):
				442	case M_START(5):
				443	case M_START(6):
				444	case M_START(7):
				445	case M_START(8):
				446	case M_START(9):
				447	re->startp[token - M_START(0)] = (char *)here;
				448	break;
				449
				450	case M_END(0):
				451	case M_END(1):
				452	case M_END(2):
				453	case M_END(3):
				454	case M_END(4):
				455	case M_END(5):
				456	case M_END(6):
				457	case M_END(7):
				458	case M_END(8):
				459	case M_END(9):
				460	re->endp[token - M_END(0)] = (char *)here;
				461	if (token == M_END(0))
				462	{
				463	return 0;
				464	}
				465	break;
				466
				467	default: /* literal, M_CLASS(n), or M_ANY */
				468	if (match1(re, *here, token, ignoreCase) != 0)
				469	return 1;
				470	here++;
				471	}
				472	}
				473
				474	/* C L O S U R E */
				475
				476	/* step 1: see what we have to match against, and move "prog" to point
				477	* the the remainder of the compiled RE.
				478	*/
				479	closure = token;
				480	prog++, token = GET_META(prog);
				481	prog++;
				482
				483	/* step 2: see how many times we can match that token against the string */
				484	for (nmatched = 0;
				485	(closure != M_QMARK \|\| nmatched < 1) && here && match1(re, here, token, ignoreCase) == 0;
				486	nmatched++, here++)
				487	{
				488	}
				489
				490	/* step 3: try to match the remainder, and back off if it doesn't */
				491	while (nmatched >= 0 && match(re, str, prog, here, ignoreCase) != 0)
				492	{
				493	nmatched--;
				494	here--;
				495	}
				496
				497	/* so how did it work out? */
				498	if (nmatched >= ((closure == M_PLUS) ? 1 : 0))
				499	return 0;
				500	return 1;
				501	}
				502
				503
				504	/* This function compiles a regexp. */
				505	extern regexp regcomp(char text)
				506	{
				507	int needfirst;
				508	unsigned size;
				509	int token;
				510	int peek;
				511	char *build;
Eric Andersen	96bcfd3	1999-11-12 01:30:18 +0000	[diff] [blame]	512	regexp *re; // Ignore compiler whining. If we longjmp, we don't use re anymore.
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	513
				514
				515	/* prepare for error handling */
				516	re = (regexp *)0;
				517	if (setjmp(errorhandler))
				518	{
				519	if (re)
				520	{
				521	free(re);
				522	}
				523	return (regexp *)0;
				524	}
				525
				526	/* if an empty regexp string was given, use the previous one */
				527	if (*text == 0)
				528	{
				529	if (!previous)
				530	{
				531	FAIL("No previous RE");
				532	}
				533	text = previous;
				534	}
				535	else /* non-empty regexp given, so remember it */
				536	{
				537	if (previous)
				538	free(previous);
				539	previous = (char *)malloc((unsigned)(strlen(text) + 1));
				540	if (previous)
				541	strcpy(previous, text);
				542	}
				543
				544	/* allocate memory */
				545	class_cnt = 0;
				546	start_cnt = 1;
				547	end_sp = 0;
				548	retext = text;
				549	size = calcsize(text) + sizeof(regexp);
				550	re = (regexp *)malloc((unsigned)size);
				551
				552	if (!re)
				553	{
				554	FAIL("Not enough memory for this RE");
				555	}
				556
				557	/* compile it */
				558	build = &re->program[1 + 32 * class_cnt];
				559	re->program[0] = class_cnt;
				560	for (token = 0; token < NSUBEXP; token++)
				561	{
				562	re->startp[token] = re->endp[token] = (char *)0;
				563	}
				564	re->first = 0;
				565	re->bol = 0;
				566	re->minlen = 0;
				567	needfirst = 1;
				568	class_cnt = 0;
				569	start_cnt = 1;
				570	end_sp = 0;
				571	retext = text;
				572	for (token = M_START(0), peek = gettoken(&text, re);
				573	token;
				574	token = peek, peek = gettoken(&text, re))
				575	{
				576	/* special processing for the closure operator */
				577	if (IS_CLOSURE(peek))
				578	{
				579	/* detect misuse of closure operator */
				580	if (IS_START(token))
				581	{
				582	FAIL("* or \\+ or \\? follows nothing");
				583	}
				584	else if (IS_META(token) && token != M_ANY && !IS_CLASS(token))
				585	{
				586	FAIL("* or \\+ or \\? can only follow a normal character or . or []");
				587	}
				588
				589	/* it is okay -- make it prefix instead of postfix */
				590	ADD_META(build, peek);
				591
				592	/* take care of "needfirst" - is this the first char? */
				593	if (needfirst && peek == M_PLUS && !IS_META(token))
				594	{
				595	re->first = token;
				596	}
				597	needfirst = 0;
				598
				599	/* we used "peek" -- need to refill it */
				600	peek = gettoken(&text, re);
				601	if (IS_CLOSURE(peek))
				602	{
				603	FAIL("* or \\+ or \\? doubled up");
				604	}
				605	}
				606	else if (!IS_META(token))
				607	{
				608	/* normal char is NOT argument of closure */
				609	if (needfirst)
				610	{
				611	re->first = token;
				612	needfirst = 0;
				613	}
				614	re->minlen++;
				615	}
				616	else if (token == M_ANY \|\| IS_CLASS(token))
				617	{
				618	/* . or [] is NOT argument of closure */
				619	needfirst = 0;
				620	re->minlen++;
				621	}
				622
				623	/* the "token" character is not closure -- process it normally */
				624	if (token == M_BEGLINE)
				625	{
				626	/* set the BOL flag instead of storing M_BEGLINE */
				627	re->bol = 1;
				628	}
				629	else if (IS_META(token))
				630	{
				631	ADD_META(build, token);
				632	}
				633	else
				634	{
				635	*build++ = token;
				636	}
				637	}
				638
				639	/* end it with a \) which MUST MATCH the opening \( */
				640	ADD_META(build, M_END(0));
				641	if (end_sp > 0)
				642	{
				643	FAIL("Not enough \\)s");
				644	}
				645
				646	return re;
				647	}
				648
				649
				650
				651
				652	/* This function searches through a string for text that matches an RE. */
				653	/* re -- the compiled regexp to search for */
				654	/* str -- the string to search through */
				655	/* bol -- does str start at the beginning of a line? (boolean) */
				656	/* ignoreCase -- ignoreCase or not */
				657	extern int regexec(struct regexp* re, char* str, int bol, int ignoreCase)
				658	{
				659	char prog; / the entry point of re->program */
				660	int len; /* length of the string */
				661	char *here;
				662
				663	/* if must start at the beginning of a line, and this isn't, then fail */
				664	if (re->bol && bol==TRUE)
				665	{
				666	return FALSE;
				667	}
				668
				669	len = strlen(str);
				670	prog = re->program + 1 + 32 * re->program[0];
				671
				672	/* search for the RE in the string */
				673	if (re->bol)
				674	{
				675	/* must occur at BOL */
				676	if ((re->first
				677	&& match1(re, (char )str, re->first, ignoreCase))/* wrong first letter? */
				678	\|\| len < re->minlen /* not long enough? */
				679	\|\| match(re, (char )str, prog, str, ignoreCase)) / doesn't match? */
				680	return FALSE; /* THEN FAIL! */
				681	}
				682	else if (ignoreCase == FALSE)
				683	{
				684	/* can occur anywhere in the line, noignorecase */
				685	for (here = (char *)str;
				686	(re->first && re->first != *here)
				687	\|\| match(re, (char *)str, prog, here, ignoreCase);
				688	here++, len--)
				689	{
				690	if (len < re->minlen)
				691	return FALSE;
				692	}
				693	}
				694	else
				695	{
				696	/* can occur anywhere in the line, ignorecase */
				697	for (here = (char *)str;
				698	(re->first && match1(re, *here, (int)re->first, ignoreCase))
				699	\|\| match(re, (char *)str, prog, here, ignoreCase);
				700	here++, len--)
				701	{
				702	if (len < re->minlen)
				703	return FALSE;
				704	}
				705	}
				706
				707	/* if we didn't fail, then we must have succeeded */
				708	return TRUE;
				709	}
				710
				711
				712
				713
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	714	#if defined BB_SED
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	715	/* This performs substitutions after a regexp match has been found. */
				716	extern void regsub(regexp* re, char* src, char* dst)
				717	{
				718	char *cpy;
				719	char *end;
				720	char c;
				721	char *start;
				722	int mod;
				723
				724	mod = 0;
				725
				726	start = src;
				727	while ((c = *src++) != '\0')
				728	{
				729	/* recognize any meta characters */
				730	if (c == '&')
				731	{
				732	cpy = re->startp[0];
				733	end = re->endp[0];
				734	}
				735	else if (c == '~')
				736	{
				737	cpy = previous1;
				738	if (cpy)
				739	end = cpy + strlen(cpy);
				740	}
				741	else
				742	if (c == '\\')
				743	{
				744	c = *src++;
				745	switch (c)
				746	{
				747	case '0':
				748	case '1':
				749	case '2':
				750	case '3':
				751	case '4':
				752	case '5':
				753	case '6':
				754	case '7':
				755	case '8':
				756	case '9':
				757	/* \0 thru \9 mean "copy subexpression" */
				758	c -= '0';
				759	cpy = re->startp[(int)c];
				760	end = re->endp[(int)c];
				761	break;
				762	case 'U':
				763	case 'u':
				764	case 'L':
				765	case 'l':
				766	/* \U and \L mean "convert to upper/lowercase" */
				767	mod = c;
				768	continue;
				769
				770	case 'E':
				771	case 'e':
				772	/* \E ends the \U or \L */
				773	mod = 0;
				774	continue;
				775	case '&':
				776	/* "\&" means "original text" */
				777	*dst++ = c;
				778	continue;
				779
				780	case '~':
				781	/* "\~" means "previous text, if any" */
				782	*dst++ = c;
				783	continue;
				784	default:
				785	/* ordinary char preceded by backslash */
				786	*dst++ = c;
				787	continue;
				788	}
				789	}
				790	else
				791	{
				792	/* ordinary character, so just copy it */
				793	*dst++ = c;
				794	continue;
				795	}
				796
				797	/* Note: to reach this point in the code, we must have evaded
				798	* all "continue" statements. To do that, we must have hit
				799	* a metacharacter that involves copying.
				800	*/
				801
				802	/* if there is nothing to copy, loop */
				803	if (!cpy)
				804	continue;
				805
				806	/* copy over a portion of the original */
				807	while (cpy < end)
				808	{
				809	switch (mod)
				810	{
				811	case 'U':
				812	case 'u':
				813	/* convert to uppercase */
				814	if (isascii(cpy) && islower(cpy))
				815	{
				816	dst++ = toupper(cpy);
				817	cpy++;
				818	}
				819	else
				820	{
				821	dst++ = cpy++;
				822	}
				823	break;
				824
				825	case 'L':
				826	case 'l':
				827	/* convert to lowercase */
				828	if (isascii(cpy) && isupper(cpy))
				829	{
				830	dst++ = tolower(cpy);
				831	cpy++;
				832	}
				833	else
				834	{
				835	dst++ = cpy++;
				836	}
				837	break;
				838
				839	default:
				840	/* copy without any conversion */
				841	dst++ = cpy++;
				842	}
				843
				844	/* \u and \l end automatically after the first char */
				845	if (mod && (mod == 'u' \|\| mod == 'l'))
				846	{
				847	mod = 0;
				848	}
				849	}
				850	}
				851	*dst = '\0';
				852
				853	/* remember what text we inserted this time */
				854	if (previous1)
				855	free(previous1);
				856	previous1 = (char *)malloc((unsigned)(strlen(start) + 1));
				857	if (previous1)
				858	strcpy(previous1, start);
				859	}
Eric Andersen	7f1acfd	1999-10-29 23:09:13 +0000	[diff] [blame]	860	#endif
Eric Andersen	aa0765e	1999-10-22 04:30:20 +0000	[diff] [blame]	861
				862	#endif /* BB_REGEXP */
				863
				864