a99  V32.6
allegro Windows Hauptprogramm
 Alle Klassen Dateien Funktionen Variablen Typdefinitionen Aufzählungen Aufzählungswerte Makrodefinitionen
regex.cpp
gehe zur Dokumentation dieser Datei
1 /*
2  * regex - Regular expression pattern matching and replacement
3  *
4  * By: Ozan S. Yigit (oz)
5  * Dept. of Computer Science
6  * York University
7  *
8  * Modified and extended by: B. Eversberg, UB Braunschweig 2006-12
9  * modifications marked _ev_ and converted into C++
10  *
11  * These routines are the PUBLIC DOMAIN equivalents of regex
12  * routines as found in 4.nBSD UN*X, with minor extensions.
13  *
14  * These routines are derived from various implementations found
15  * in software tools books, and Conroy's grep. They are NOT derived
16  * from licensed/restricted software.
17  * For more interesting/academic/complicated implementations,
18  * see Henry Spencer's regexp routines, or GNU Emacs pattern
19  * matching module.
20  *
21  * See also: http://www.pcre.org/ Perl Compatible RE
22  *
23  * Interfaces:
24  * re_comp: compile a regular expression into a NFA.
25  *
26  * char *re_comp(s) // Ergebnis: reg.Expr. in nfb
27  * char *s; // return: 0 or Error message
28  *
29  * re_exec: execute a composite expression, like a / b + c - d
30  * sg_exec: execute a single NFA to match a pattern
31  * int re_exec(s)
32  * char *s;
33  *
34  * re_modw change re_exec's understanding of what a "word"
35  * looks like (for < and >) by adding into the
36  * hidden word-syntax table.
37  *
38  * void re_modw(s)
39  * char *s;
40  *
41  * re_subs: substitute the matched portions in a new string.
42  *
43  * int re_subs(src, dst)
44  * char *src;
45  * char *dst;
46  *
47  * re_fail: failure routine for re_exec.
48  *
49  * void re_fail(msg, op)
50  * char *msg;
51  * char op;
52  *
53  * Regular Expressions:
54  *
55  * [1] char matches itself, unless it is a special
56  * character (metachar): . \ [ ] * + ^ $
57  *
58  * [2] . matches any character.
59  *
60  * [3] \ matches the character following it, except
61  * when followed by a left or right round bracket,
62  * a digit 1 to 9 or a left or right angle bracket.
63  * (see [7], [8] and [9])
64  * It is used as an escape character for all
65  * other meta-characters, and itself. When used
66  * in a set ([4]), it is treated as an ordinary
67  * character.
68  *
69  * [4] [set] matches one of the characters in the set.
70  * If the first character in the set is "^",
71  * it matches a character NOT in the set, i.e.
72  * complements the set. A shorthand S-E is
73  * used to specify a set of characters S upto
74  * E, inclusive. The special characters "]" and
75  * "-" have no special meaning if they appear
76  * as the first chars in the set.
77  * examples: match:
78  *
79  * [a-z] any lowercase alpha
80  *
81  * [^]-] any char except ] and -
82  *
83  * [^A-Z] any char except uppercase
84  * alpha
85  *
86  * [a-zA-Z] any alpha
87  *
88  * [5] * any regular expression form [1] to [4], followed by
89  * closure char (*) matches zero or more matches of
90  * that form.
91  *
92  * [6] + same as [5], except it matches one or more.
93  *
94  * [7] a regular expression in the form [1] to [10], enclosed
95  * as \(form\) matches what form matches. The enclosure
96  * creates a set of tags, used for [8] and for
97  * pattern substution. The tagged forms are numbered
98  * starting from 1.
99  *
100  * [8] a \ followed by a digit 1 to 9 matches whatever a
101  * previously tagged regular expression ([7]) matched.
102  *
103  * [9] < a regular expression starting with a < construct
104  * > and/or ending with a > construct, restricts the
105  * pattern matching to the beginning of a word, and/or
106  * the end of a word. A word is defined to be a character
107  * string beginning and/or ending with the characters
108  * A-Z a-z 0-9 and _. It must also be preceded and/or
109  * followed by any character outside those mentioned.
110  *
111  * [10] a composite regular expression xy where x and y
112  * are in the form [1] to [10] matches the longest
113  * match of x followed by a match for y.
114  *
115  * [11] ^ a regular expression starting with a ^ character
116  * $ and/or ending with a $ character, restricts the
117  * pattern matching to the beginning of the line,
118  * or the end of line. [anchors] Elsewhere in the
119  * pattern, ^ and $ are treated as ordinary characters.
120  *
121  *
122  * Acknowledgements:
123  *
124  * HCR's Hugh Redelmeier has been most helpful in various
125  * stages of development. He convinced me to include BOW
126  * and EOW constructs, originally invented by Rob Pike at
127  * the University of Toronto.
128  *
129  * References:
130  * Software tools Kernighan & Plauger
131  * Software tools in Pascal Kernighan & Plauger
132  * Grep [rsx-11 C dist] David Conroy
133  * ed - text editor Un*x Programmer's Manual
134  * Advanced editing on Un*x B. W. Kernighan
135  * RegExp routines Henry Spencer
136  *
137  * Notes:
138  *
139  * This implementation uses a bit-set representation for character
140  * classes for speed and compactness. Each character is represented
141  * by one bit in a 128-bit block. Thus, CCL always takes a
142  * constant 16 bytes in the internal nfa, and re_exec does a single
143  * bit comparison to locate the character in the set.
144  *
145  * Examples:
146  *
147  * pattern: foo*.*
148  * compile: CHR f CHR o CLO CHR o END CLO ANY END END
149  * matches: fo foo fooo foobar fobar foxx ...
150  *
151  * pattern: fo[ob]a[rz]
152  * compile: CHR f CHR o CCL bitset CHR a CCL bitset END
153  * matches: fobar fooar fobaz fooaz
154  *
155  * pattern: foo\\+
156  * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END
157  * matches: foo\ foo\\ foo\\\ ...
158  *
159  * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo)
160  * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
161  * matches: foo1foo foo2foo foo3foo
162  *
163  * pattern: \(fo.*\)-\1
164  * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
165  * matches: foo-foo fo-fo fob-fob foobar-foobar ...
166  */
167 
168 // #include "regex.h"
169 
170 // _ev_ added:
171 #include "allegro.hpp"
172 
173 
174 // _ev_ allow 2000 rather than 1000 byte
175 #define MAXNFA 2046
176 #define MAXTAG 10
177 
178 #define OKP 1
179 #define NOP 0
180 
181 #define CHR 1
182 #define ANY 2
183 #define CCL 3
184 #define BOL 4
185 #define EOL 5
186 #define BOT 6
187 #define EOT 7
188 #define BOW 8
189 #define EOW 9
190 #define REF 10
191 #define CLO 11
192 
193 // _ev_ new tokens
194 #define LET 12
195 #define EQL 14
196 #define GTR 15
197 #define LSS 16
198 
199 #define BSP 31
200 
201 #define END 0
202 
203 /*
204  * The following defines are not meant to be changeable.
205  * They are for readability only.
206  */
207 #define MAXCHR 128
208 #define CHRBIT 8
209 #define BITBLK MAXCHR/CHRBIT
210 #define BLKIND 0170
211 #define BITIND 07
212 
213 #define ASCIIB 0177
214 
215 #ifdef NO_UCHAR
216 typedef char Char;
217 #else
218 typedef unsigned char Char;
219 #endif
220 
221 static int tagstk[MAXTAG]; /* subpat tag stack..*/
222 
223 static Char *nfa;
224 //static Char nfa[MAXNFA]; /* automaton.. */
225 
226 static Char nfb[MAXNFA]; // kompilierter reg. Ausdruck
227 static Char *nfc[100]; // 100 ptrs into nfb
228 static int nfi; // total num of nfc's
229 
230 // static Char nfb[MAXNFA]; /* backup of it */
231 // static Char nfp[MAXNFA]; /* part of it */
232 
233 static int sta = NOP; /* status of lastpat */
234 
235 static Char bittab[BITBLK]; /* bit table for CCL */
236  /* pre-set bits... */
237 static Char bitarr[] = {1,2,4,8,16,32,64,128};
238 
239 #ifdef DEBUG
240 static void nfadump(Char *);
241 void symbolic(char *);
242 #endif
243 
244 static void
245 chset(Char c)
246 {
247  bittab[(Char) ((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND];
248 }
249 
250 #define badpat(x) (*nfc[nfi] = END, x)
251 #define store(x) *mp++ = x
252 
253 char *
254 re_comp(char *pat) // pattern "pat" in nfa umwandeln
255 {
256  char *p; /* pattern pointer */
257  Char *mp=nfb; /* nfa pointer */
258  Char *lp; /* saved pointer.. */
259  Char *sp=nfb; /* another one.. */
260 
261  nfi=0;
262  nfc[0] = nfb;
263 
264  int tagi = 0; /* tag stack index */
265  int tagc = 1; /* actual tag count */
266 
267  int n;
268  Char mask; /* xor mask -CCL/NCL */
269  int c1, c2;
270 
271  if (!pat || !*pat)
272  if (sta)
273  return 0;
274  else
275  return badpat("No previous regular expression");
276  sta = NOP;
277 
278  p=pat;
279 
280  if(*p=='_') store(*p++);
281  if(*p=='-') store(*p++);
282 
283  for (p; *p; p++) {
284  lp = mp;
285  switch(*p) {
286 
287  case '.': /* match any char.. */
288  store(ANY);
289  break;
290 
291  case '^': /* match beginning.. */
292  case -73: // wenn signed char!
293  case 183: // wenn unsigned char!
294  if (p == pat || (p==pat+1 && *pat=='_'))
295  store(BOL); // nur wenn erstes Zeichen des Suchbegriffs
296  else {
297  store(CHR);
298  store(94);
299  }
300  break;
301 
302  case '$': /* match endofline.. */
303  if (!*(p+1))
304  store(EOL);
305  else {
306  store(CHR);
307  store(*p);
308  }
309  break;
310 
311  case '[': /* match char class..*/
312  store(CCL);
313 
314  if (*++p == '^') {
315  mask = 0377;
316  p++;
317  }
318  else
319  mask = 0;
320 
321  if (*p == '-') /* real dash */
322  chset(*p++);
323  if (*p == ']') /* real brac */
324  chset(*p++);
325  while (*p && *p != ']') {
326  if (*p == '-' && *(p+1) && *(p+1) != ']') {
327  p++;
328  c1 = *(p-2) + 1;
329  c2 = *p++;
330  if(c1>c2) return badpat("Error in [ ] / Fehler in [ ]"); // -ev- NEU
331  else while (c1 <= c2)
332  chset((Char)c1++);
333  }
334 #ifdef EXTEND
335  else if (*p == '\\' && *(p+1)) {
336  p++;
337  chset(*p++);
338  }
339 #endif
340  else
341  chset(*p++);
342  }
343  if (!*p)
344  return badpat("Missing ] / ] fehlt");
345 
346  for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
347  store(mask ^ bittab[n]);
348 
349  break;
350 
351  case '*': /* match 0 or more.. */
352  case '+': /* match 1 or more.. */
353  if (p == pat)
354  return badpat("Empty closure / *+ nicht am Anfang!");
355  lp = sp; /* previous opcode */
356  if (*lp == CLO) /* equivalence.. */
357  break;
358  switch(*lp) {
359 
360  case BOL:
361  case BOT:
362  case EOT:
363  case BOW:
364  case EOW:
365  case REF:
366  return badpat("Illegal closure / * nicht sinnvoll");
367  default:
368  break;
369  }
370 
371  if (*p == '+')
372  for (sp = mp; lp < sp; lp++)
373  store(*lp);
374 
375  store(END);
376  store(END);
377  sp = mp;
378  while (--mp > lp)
379  *mp = mp[-1];
380  store(CLO);
381  mp = sp;
382  break;
383 
384  case '\\': /* tags, backrefs .. */
385  switch(*++p) {
386 
387  case '(':
388  if (tagc < MAXTAG) {
389  tagstk[++tagi] = tagc;
390  store(BOT);
391  store(tagc++);
392  }
393  else
394  return badpat("Too many \\(\\) pairs / zu viele \\(\\)");
395  break;
396  case ')':
397  if (*sp == BOT)
398  return badpat("Null pattern inside \\(\\) / nichts zw. \\(\\)");
399  if (tagi > 0) {
400  store(EOT);
401  store(tagstk[tagi--]);
402  }
403  else
404  return badpat("Unmatched \\ / es fehlt ein \\)");
405  break;
406  case '<':
407  store(BOW);
408  break;
409  case '>':
410  if (*sp == BOW)
411  return badpat("Null pattern inside \\<\\> / nichts zwischen \\<\\>");
412  store(EOW);
413  break;
414  case '1':
415  case '2':
416  case '3':
417  case '4':
418  case '5':
419  case '6':
420  case '7':
421  case '8':
422  case '9':
423  n = *p-'0';
424  if (tagi > 0 && tagstk[tagi] == n)
425  return badpat("Cyclical reference / zyklische Referenz");
426  if (tagc > n) {
427  store(REF);
428  store(n);
429  }
430  else
431  return badpat("Undetermined reference / unbestimmte Referenz");
432  break;
433  case 'w':
434  store(LET);
435  break;
436  case 's': // space
437  store(CHR);
438  store(' ');
439  break;
440 #ifdef EXTEND
441  case 'b':
442  store(CHR);
443  store('\b');
444  break;
445  case 'n':
446  store(CHR);
447  store('\n');
448  break;
449  case 'f':
450  store(CHR);
451  store('\f');
452  break;
453  case 'r':
454  store(CHR);
455  store('\r');
456  break;
457  case 't':
458  store(CHR);
459  store('\t');
460  break;
461 #endif
462  default:
463  store(CHR);
464  store(*p);
465  }
466  break;
467 
468 // _ev_ begin
469 
470  case '=': // abc<xyz is abc followed by a number <xyz ?
471  store(EQL); // and the rest goes right after this:
472  while(*++p && *p!=31) store(*p);
473  --p;
474  break;
475 
476  case '<': // abc<xyz is abc followed by a number <xyz ?
477  if(p[1]=='<') { store(CHR); store('<'); ++p; break; } // find < itself
478  store(LSS); // and the rest goes right after this:
479  while(*++p && *p!=31) store(*p);
480  --p;
481  break;
482 
483  case '>': // abc>xyz after abc follows number >xyz
484  if(p[1]=='>') { store(CHR); store('>'); ++p; break; } // find > itself
485  store(GTR);
486  while(*++p && *p!=31) store(*p);
487  --p;
488  break;
489 
490  case BSP: // separator for boolean code : 31 31 + / -
491  if(p[1]==BSP)
492  {
493  if(nfi==100) return badpat("too many terms - 100 is limit");
494  ++p;
495  store(END);
496  nfc[++nfi]=mp;
497  store(*++p);
498  if(p[1]=='_') store(*++p);
499  if(p[1]=='-') store(*++p);
500  break;
501  }
502 // _ev_ end
503 
504  default : /* an ordinary char */
505  store(CHR);
506  store(*p);
507  break;
508  }
509  sp = lp;
510  }
511  if (tagi > 0)
512  return badpat("Unmatched \\( / Es fehlt \\(");
513  store(END);
514  sta = OKP;
515  return 0;
516 }
517 
518 
519 // _ev_ renamed sg_exec!
520 // re_exec() does Boolean operations
521 
522 static char *bol;
523 char *bopat[MAXTAG];
524 char *eopat[MAXTAG];
525 static char *pmatch(char *, Char *);
526 
527 /*
528  * sg_exec:
529  * execute a single nfa to find a match.
530  *
531  * special cases: (nfa[0])
532  * BOL
533  * Match only once, starting from the
534  * beginning.
535  * CHR
536  * First locate the character without
537  * calling pmatch, and if found, call
538  * pmatch for the remaining string.
539  * END
540  * re_comp failed, poor luser did not
541  * check for it. Fail fast.
542  *
543  * If a match is found, bopat[0] and eopat[0] are set
544  * to the beginning and the end of the matched fragment,
545  * respectively.
546  *
547  * return 0 if no match
548  *
549  */
550 
551 int
552 sg_exec(char *lp) // called from re_exec(), Applies nfa to lp
553 {
554  bool bm=0;
555  Char c;
556  char *ep = 0;
557  Char *ap = nfa;
558  if(*ap=='-') { ++ap; bm=1; } // -term : negative search
559  bol = lp;
560 
561  bopat[0] = 0;
562  bopat[1] = 0;
563  bopat[2] = 0;
564  bopat[3] = 0;
565  bopat[4] = 0;
566  bopat[5] = 0;
567  bopat[6] = 0;
568  bopat[7] = 0;
569  bopat[8] = 0;
570  bopat[9] = 0;
571 
572  switch(*ap) {
573 
574 // case BOL: /* anchored: match from BOL only -ev- */
575 // ep = pmatch(lp,ap);
576 // break;
577  case CHR: /* ordinary char: locate it fast */
578  c = *(ap+1);
579  while (*lp && *lp != c)
580  lp++;
581  if (!*lp) /* if EOS, fail, else fall thru. */
582  return bm;
583  default: /* regular matching all the way. */
584 #ifdef OLD
585  while (*lp) {
586  if ((ep = pmatch(lp,ap)))
587  break;
588  lp++;
589  }
590 #else /* match null string */
591  do {
592  if ((ep = pmatch(lp,ap)))
593  break;
594  lp++;
595  } while (*lp);
596 #endif
597  break;
598  case END: /* munged automaton. fail always */
599  return bm;
600  }
601  if (!ep)
602  return bm;
603 
604  bopat[0] = lp;
605  eopat[0] = ep;
606  return 1-bm;
607 }
608 
609 
610 /*
611  * re_exec:
612  * execute a composite (Boolean) expression.
613  * ce may contain several parts, separated by BSP BSP Op
614  * with Op = + / -
615  *
616  * return 0 if no match
617 */
618 
619 
620 // _ev_ modified!
621 
622 int
623 re_exec(char *ce)
624 {
625  // nfa contains the regex, ce the text
626  int e, k;
627  char c;
628  char *cet; cet=ce;
629 extern char inpu[]; // == iV = Suchbereich -ev-
630  int ni=0;
631  nfa=nfc[0];
632 //FILE *nf=fopen("nfa","w"); //TEST
633 //fprintf(nf,"%d %s\n",nfi,nfa);
634  if(*nfa=='_') { cet=inpu; ++nfa; }
635  e = sg_exec(cet);
636  if(nfi==0) ;
637  else while(ni<nfi)
638  {
639  ++ni;
640  nfa=nfc[ni];
641 //fprintf(nf,"%d %s\n",nfi,nfa);
642  cet=ce;
643  c=*nfa++; // command
644  if(*nfa=='_') { cet=inpu; ++nfa; }
645  switch(c)
646  {
647  case '+': // + AND
648  k = sg_exec(cet);
649  if(!k) e=0;
650  break;
651  case '/': // / OR
652  k = sg_exec(cet);
653  if(k || e) e=1;
654  break;
655  case '%': // % EXOR
656  k = sg_exec(cet);
657  if((k && !e) || (!k && e)) e=1; else e=0;
658  break;
659  case '-': // - ANDNOT
660  k = sg_exec(cet);
661  if(k) e=0;
662  break;
663  }
664  }
665 //fclose(nf);
666  return e;
667 }
668 
669 
670 /*
671  * pmatch: internal routine for the hard part
672  *
673  * This code is partly snarfed from an early grep written by
674  * David Conroy. The backref and tag stuff, and various other
675  * innovations are by oz.
676  *
677  * special case optimizations: (nfa[n], nfa[n+1])
678  * CLO ANY
679  * We KNOW .* will match everything upto the
680  * end of line. Thus, directly go to the end of
681  * line, without recursive pmatch calls. As in
682  * the other closure cases, the remaining pattern
683  * must be matched by moving backwards on the
684  * string recursively, to find a match for xy
685  * (x is ".*" and y is the remaining pattern)
686  * where the match satisfies the LONGEST match for
687  * x followed by a match for y.
688  * CLO CHR
689  * We can again scan the string forward for the
690  * single char and at the point of failure, we
691  * execute the remaining nfa recursively, same as
692  * above.
693  *
694  * At the end of a successful match, bopat[n] and eopat[n]
695  * are set to the beginning and end of subpatterns matched
696  * by tagged expressions (n = 1 to 9).
697  *
698  */
699 
700 #ifndef re_fail
701 extern void re_fail(char *, Char);
702 #endif
703 
704 /*
705  * character classification table for word boundary operators BOW
706  * and EOW. the reason for not using ctype macros is that we can
707  * let the user add into our own table. see re_modw. This table
708  * is not in the bitset form, since we may wish to extend it in the
709  * future for other character classifications.
710  *
711  * TRUE for 0-9 A-Z a-z _
712  */
713 static Char chrtyp[MAXCHR] = {
714  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
715  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
716  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
717  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
718  0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
719  1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
720  0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
721  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
722  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
723  1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
724  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
725  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
726  1, 1, 1, 0, 0, 0, 0, 0
727  };
728 
729 #define inascii(x) (0177&(x))
730 #define iswordc(x) chrtyp[inascii(x)]
731 #define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
732 
733 /*
734  * skip values for CLO XXX to skip past the closure
735  */
736 
737 #define ANYSKIP 2 /* [CLO] ANY END ... */
738 #define CHRSKIP 3 /* [CLO] CHR chr END ... */
739 #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
740 
741 static char *
742 pmatch(char *lp, Char *ap) // find ap in lp
743 {
744  extern int Skt;
745  int op, c, n;
746  char *e; /* extra pointer for CLO */
747  char *bp; /* beginning of subpat.. */
748  char *ep; /* ending of subpat.. */
749  char *are; /* to save the line ptr. */
750 
751  while ((op = *ap++) != END)
752  switch(op) {
753 
754  case CHR:
755  if (*lp++ != *ap++)
756  return 0;
757  break;
758  case ANY:
759  if (!*lp++)
760  return 0;
761  break;
762 
763 // _ev_ begin
764 
765  case LET: // letter
766  if (!isalpha(*lp) && *lp!='_')
767  return 0;
768  ++lp;
769  break;
770  case EQL: // equal
771  char *z; z=lp; // look for 1st digit or -
772  while(*z && (!isdigit(*z) || *z=='-')) ++z;
773  if(!*z) z=lp;
774  if(atol(z) == atol((char *)ap)) return lp;
775  else return 0;
776  case LSS: // less
777  z=lp;
778  if(isdigit(*ap) || *ap=='-')
779  {
780  while(*z && (!isdigit(*z) || *z=='-')) ++z;
781  if(!*z) z=lp;
782  if(atol(z) < atol((char *)ap)) return lp;
783  else return 0;
784  }
785  else // alpha compare
786  {
787  if(strcmp(z,ap)<0) return lp;
788  else return 0;
789  }
790  case GTR: // greater
791  z=lp;
792  if(isdigit(*ap) || *ap=='-')
793  {
794  while(*z && (!isdigit(*z) || *z=='-')) ++z;
795  if(!*z) z=lp;
796  if(atol(z) > atol((char *)ap)) return lp;
797  else return 0;
798  }
799  else // alpha compare
800  {
801  if(strcmp(z,ap)>0) return lp;
802  else return 0;
803  }
804 // _ev_ end
805 
806  case CCL:
807  c = *lp++;
808  if (!isinset(ap,c))
809  return 0;
810  ap += BITBLK;
811  break;
812  case BOL: // kommt nur an erster Position von nfa vor
813 extern char inpu[];
814  if(*inpu=='#') { if(lp[-Skt]!='#') return 0; } // special case: allegro field -ev-
815  else if (lp != bol) return 0;
816  break;
817  case EOL:
818  if (*lp==10 || *lp==13 || !*lp) ; // orig: if(!*lp) _ev_
819  else return 0;
820  break;
821  case BOT:
822  bopat[*ap++] = lp;
823  break;
824  case EOT:
825  eopat[*ap++] = lp;
826  break;
827  case BOW:
828  if (lp!=bol && iswordc(lp[-1]) || !iswordc(*lp))
829  return 0;
830  break;
831  case EOW:
832  if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
833  return 0;
834  break;
835  case REF:
836  n = *ap++;
837  bp = bopat[n];
838  ep = eopat[n];
839  while (bp < ep)
840  if (*bp++ != *lp++)
841  return 0;
842  break;
843  case CLO:
844  are = lp;
845  switch(*ap) {
846 
847  case ANY:
848  while (*lp && *lp!=10) // match nur bis Feldende, ev
849  lp++;
850  n = ANYSKIP;
851  break;
852  case CHR:
853  c = *(ap+1);
854  while (*lp && c == *lp)
855  lp++;
856  n = CHRSKIP;
857  break;
858  case CCL:
859  while ((c = *lp) && isinset(ap+1,c))
860  lp++;
861  n = CCLSKIP;
862  break;
863  default:
864  re_fail("closure: expression not OK.", *ap);
865  return 0;
866  }
867 
868  ap += n;
869 
870  while (lp >= are) {
871  if (e = pmatch(lp, ap))
872  return e;
873  --lp;
874  }
875  return 0;
876  default:
877  re_fail("sg_exec: expression not ok.", op);
878  return 0;
879  }
880  return lp;
881 }
882 
883 /*
884  * re_modw:
885  * add new characters into the word table to change re_exec's
886  * understanding of what a word should look like. Note that we
887  * only accept additions into the word definition.
888  *
889  * If the string parameter is 0 or null string, the table is
890  * reset back to the default containing A-Z a-z 0-9 _. [We use
891  * the compact bitset representation for the default table]
892  */
893 
894 static Char deftab[16] = {
895  0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
896  0376, 0377, 0377, 007
897 };
898 
899 void
900 re_modw(char *s)
901 {
902  int i;
903 
904  if (!s || !*s) {
905  for (i = 0; i < MAXCHR; i++)
906  if (!isinset(deftab,i))
907  iswordc(i) = 0;
908  }
909  else
910  while(*s)
911  iswordc(*s++) = 1;
912 }
913 
914 /*
915  * re_subs:
916  * substitute the matched portions of the src in dst.
917  *
918  * & substitute the entire matched pattern.
919  *
920  * \digit substitute a subpattern, with the given tag number.
921  * Tags are numbered from 1 to 9. If the particular
922  * tagged subpattern does not exist, null is substituted.
923  */
924 
925 int
926 re_subs(char *src, char *dst)
927 {
928  char c;
929  int pin;
930  char *bp;
931  char *ep;
932 
933  if (!*src || !bopat[0])
934  return 0;
935 
936  while (c = *src++) {
937  switch(c) {
938 
939  case '&':
940  pin = 0;
941  break;
942 
943  case '\\':
944  c = *src++;
945  if (c >= '0' && c <= '9') {
946  pin = c - '0';
947  break;
948  }
949 
950  default:
951  *dst++ = c;
952  continue;
953  }
954 
955  if ((bp = bopat[pin]) && (ep = eopat[pin])) {
956  while (*bp && bp < ep)
957  *dst++ = *bp++;
958  if (bp < ep)
959  return 0;
960  }
961  }
962  *dst = (char) 0;
963  return 1;
964 }
965 
966 #ifdef DEBUG
967 /*
968  * symbolic - produce a symbolic dump of the nfa
969  */
970 /*
971 symbolic(char *s)
972 {
973  printf("pattern: %s\n", s);
974  printf("nfacode:\n");
975  nfadump(nfa);
976 }
977 */
978 
979 /*
980 static
981 nfadump(Char *ap)
982 {
983  int n;
984 
985  while (*ap != END)
986  switch(*ap++) {
987  case CLO:
988  printf("CLOSURE");
989  nfadump(ap);
990  switch(*ap) {
991  case CHR:
992  n = CHRSKIP;
993  break;
994  case ANY:
995  n = ANYSKIP;
996  break;
997  case CCL:
998  n = CCLSKIP;
999  break;
1000  }
1001  ap += n;
1002  break;
1003  case CHR:
1004  printf("\tCHR %c\n",*ap++);
1005  break;
1006  case ANY:
1007  printf("\tANY .\n");
1008  break;
1009  case BOL:
1010  printf("\tBOL -\n");
1011  break;
1012  case EOL:
1013  printf("\tEOL -\n");
1014  break;
1015  case BOT:
1016  printf("BOT: %d\n",*ap++);
1017  break;
1018  case EOT:
1019  printf("EOT: %d\n",*ap++);
1020  break;
1021  case BOW:
1022  printf("BOW\n");
1023  break;
1024  case EOW:
1025  printf("EOW\n");
1026  break;
1027  case REF:
1028  printf("REF: %d\n",*ap++);
1029  break;
1030  case CCL:
1031  printf("\tCCL [");
1032  for (n = 0; n < MAXCHR; n++)
1033  if (isinset(ap,(Char)n)) {
1034  if (n < ' ')
1035  printf("^%c", n ^ 0x040);
1036  else
1037  printf("%c", n);
1038  }
1039  printf("]\n");
1040  ap += BITBLK;
1041  break;
1042  default:
1043  printf("bad nfa. opcode %o\n", ap[-1]);
1044  exit(1);
1045  break;
1046  }
1047 }
1048 */
1049 #endif