a99  V32.6
allegro Windows Hauptprogramm
 Alle Klassen Dateien Funktionen Variablen Typdefinitionen Aufzählungen Aufzählungswerte Makrodefinitionen
parser.cpp
gehe zur Dokumentation dieser Datei
1 /* Module Name: @(#)api_origin_search_parser.c 7.5
2  Version: 7/3/97
3  Author: John Hough
4 
5  Crossnet Systems Ltd.
6  Unit 3, The Paddock,
7  Hambridge Road,
8  Newbury,
9  Berkshire, RG14 5TQ
10 
11  Date of Issue:
12 
13  QA Acceptance:
14  Date of Acceptance:
15 
16  Description: HQL Parser - your's to freely use and modify.
17  Chesty's gone, Shakespear's in.
18 
19  Processing...
20  Infix query -> infix vsl
21  infix vsl -> postfix vsl
22  postfix vsl -> postfix query (original
23  query operators and operands re-assigned).
24 
25  This code may be freely used/modified subject to the terms
26  and conditions of the GNU general public licence.
27 
28 
29 
30 /* here because of another m4 bug - if added after the
31  long comment below the directive is left in the file !! */
32 
33 
34 
35 #include "parser.hpp"
36 extern char Aerror[];
37 
38 /******************************************************/
39 /******************************************************/
40 Parser::Parser(char *Query){
41 
42  /* Initialize global variables */
43  query=Query;
44  infix_str = NULL;
45  infix_str_pos = 0;
46  infix_str_len = 0;
47  buf = 0;
48  index_num=0;
49  memset(postfix_str, 0, sizeof(postfix_str));
50  postfix_str_pos = 0;
51 }
52 
54  for(int i=0;i<index_num;i++)
55  delete index_array[i];
56 }
57 
58 /******************************************************/
59 /******************************************************/
60 int braket(char ch)
61 {
62  if ( (ch == '(') || (ch == ')'))
63  return 1;
64  else
65  return 0;
66 
67 }
68 
69 
70 void Parser::check_query_for_sep(char* query) {
71 int string_pos=0; char ch,ct=0;
72  while (string_pos <= strlen(query)-1) {
73 
74  /* If operand (e.g. CAT ) */
75  ch=query[string_pos];
76  if(ch=='"' || ch==39) // Klammern zwischen "" und '' nicht verarb.
77  while(1) { ct=query[++string_pos]; if(!ct || ct==ch) break; }
78  else if (braket(ch)) {
79  if(string_pos>0 && query[string_pos-1]!=/*TOKEN_SEP*/32){
80  memmove(query+string_pos+1,query+string_pos,strlen(query)-string_pos+1);
81  query[string_pos]=32;
82  string_pos++;
83  }
84  if(string_pos <= strlen(query)-2 && query[string_pos+1]!=/*TOKEN_SEP*/32){
85  memmove(query+string_pos+2,query+string_pos+1,strlen(query)-string_pos+1);
86  query[string_pos+1]=32;
87  string_pos++;
88  }
89  }
90  string_pos ++;
91  }
92 
93 }
94 
95 /* Function which takes an HQL query and returns a corressponding
96  DbvRPNStructure, all filled out */
98 {
99 
100  /* Type of query */
101  Querytype querytype = QT_Unknown;
102 
103 
104  /* Array of pointer to char* (index array !) */
105  char vsl_string[MAXVAL];
106  char *query_value;
107 
108  /* Array of Querytypes */
109  Querytype query_types[MAXVAL];
110  int last_query_type_index = 0;
111  query_value=new char[strlen(query)+10];
112  if(query_value){
113 
114  /* Initialise (reset) query types array */
115  memset(query_types, 0, sizeof(query_types));
116 
117  check_query_for_sep(query);// ggf. trennende Leerzeichen einfuegen
118 
119  strcpy(query_value, query);
120  /* Convert a query string to VSL infix */
121  query_val_to_vsl_infix(query_value, vsl_string, index_array);
122  delete[]query_value;
123 
124  infix_str = vsl_string;
125  infix_str_len = strlen(infix_str);
126 
127  /* Convert VSL infix to VSL postfix */
128  if (vsl_infix_to_vsl_postfix() == -1) return NULL;
129  }
130  return(NULL);
131 }
132 
133 
134 
135 int Parser::vsl_infix_to_vsl_postfix()
136 {
137 
138  /* m4 macro bug fudge -> '{' */
139  /* (m4 gets confused if the first quote pair it encounters
140  is '{') */
141 
142  if (!nextis('{')) {
143  strcpy(Aerror,"Query Error: Query not formed properly\n");
144  return -1;
145  }
146 
147  if (expression() == -1)
148  return -1;
149  if (!nextis('}')) {
150  strcpy(Aerror,"Query Error: Query not formed properly\n");
151  return -1;
152  }
153 
154  return 0;
155 }
156 
157 
158 
159 int Parser::querytypes_to_lower_case(char *query)
160 {
161  /* Only lower case the query type */
162  while ( (*query != '\0') && (*query != QUERY_TYPE_VAL_SEP_CHAR) ){
163  *query = tolower(*query);
164  query++;
165  }
166  return strlen(query);
167 }
168 
169 
170 /* Convert query value string to VSL */
171 int Parser::query_val_to_vsl_infix(char *input_string, char *vsl_string, IndexArray *index_array[])
172 {
173  /* first token -> a, second token -> b etc. */
174  /* ( -> (, ) -> ) */
175  /* and -> *, or-> +, andnot -> - */
176 
177  char *input_string_copy;
178  char *token_obtained;
179  int insert_pos = 0;
180  char *ptr = NULL;
181 
182  /* Copy to input_string_copy to avoid seg fault if input_string is
183  a symbolic constant (defined in symbol table) */
184  input_string_copy=new char[strlen(input_string)+10];
185  token_obtained=new char[strlen(input_string)+10];
186  if(input_string_copy && token_obtained){
187  strcpy(input_string_copy, input_string);
188 
189  /* Remove any leading spaces */
190  remove_leading(input_string_copy, ' ');
191  strcat(input_string_copy, TOKEN_SEP);
192 
193  /* Start the VSL expression */
194  strcpy(vsl_string, "{");
195 
196  /* Get first token */
197  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
198  while (strcmp(token_obtained, "NO SUCH TOKEN") != 0) {
199  /* If an operator */
200  if (operator_check(token_obtained))
201  operator_process(token_obtained, vsl_string, input_string_copy);
202  else
203  /* process an operand */
204  operand_process(token_obtained, vsl_string, input_string_copy, &insert_pos, index_array);
205  }
206 
207  /* End the vsl expression */
208  strcat(vsl_string, "}");
209  delete[]input_string_copy;
210  delete[]token_obtained;
211  }
212  return 1;
213 }
214 
215 
216 bool Parser::operator_check(char *token_obtained)
217 {
218  char lowercase_token[MAXOPERATOR]; // Kopie in Lowercase
219  if(strlen(token_obtained)>MAXOPERATOR-1)return FALSE;
220 
221  memset(lowercase_token, 0, MAXOPERATOR);
222  strncpy(lowercase_token,token_obtained,MAXOPERATOR-1);
223  querytypes_to_lower_case(lowercase_token);
224 
225  if ((strcmp(lowercase_token, OPERATOR_AND) == 0) ||
226  (strcmp(lowercase_token, OPERATOR_OR) == 0) ||
227  (strcmp(lowercase_token, OPERATOR_ANDNOT) == 0) ||
228  (strcmp(lowercase_token, OPENING_BRACKET) == 0) ||
229  (strcmp(lowercase_token, CLOSING_BRACKET) == 0))
230  return TRUE;
231 
232  return FALSE;
233 }
234 
235 
236 
237 int Parser::operator_process(char *token_obtained, char *vsl_string, char *input_string_copy)
238 {
239  char lowercase_token[MAXOPERATOR];// Kopie in Lowercase
240  memset(lowercase_token, 0, MAXOPERATOR);
241  strncpy(lowercase_token,token_obtained,MAXOPERATOR-1);
242  querytypes_to_lower_case(lowercase_token);
243 
244  if (strcmp(lowercase_token, OPERATOR_AND) == 0) {
245  strcat(vsl_string, "*");
246  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
247 
248  } else if (strcmp(lowercase_token, OPERATOR_OR) == 0) {
249  strcat(vsl_string, "+");
250  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
251 
252  } else if (strcmp(lowercase_token, OPERATOR_ANDNOT) == 0) {
253  strcat(vsl_string, "-");
254  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
255 
256  } else if (strcmp(lowercase_token, OPENING_BRACKET) == 0) {
257  strcat(vsl_string, OPENING_BRACKET);
258  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
259 
260  } else if (strcmp(lowercase_token, CLOSING_BRACKET) == 0) {
261  strcat(vsl_string, CLOSING_BRACKET);
262  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
263  }
264  return 1;
265 }
266 
267 
268 
269 int Parser::operand_process(char *token_obtained, char *vsl_string, char *input_string_copy, int *insert_pos, IndexArray *index_array[])
270 {
271  char *operands;
272  char temp[MAXVAL];
273  int iLen=strlen(token_obtained)+strlen(input_string_copy)+10;
274  if((operands=new char[iLen])!=NULL){
275  complex_operand_check_and_process(token_obtained, input_string_copy);
276  memset(operands, 0, iLen);
277 
278  /* Deal with first operand */
279  memcpy(operands, token_obtained, strlen(token_obtained));
280 
281  /* Process any remaining operands */
282  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
283 
284  /* While not and, or, andnot, (, ) */
285 
286  while ((operator_check(token_obtained) != TRUE) &&
287  (strcmp(token_obtained, "NO SUCH TOKEN") != 0)) {
288 
289  strcat(operands, TOKEN_SEP);
290  strcat(operands, token_obtained);
291 
292  api_get_token_from_string(token_obtained, TOKEN_SEP, input_string_copy);
293  }
294 
295 
296  /* +1 for the terminating \0 */
297  index_array[*insert_pos] = (IndexArray *) new IndexArray(strlen(operands) + 1);
298  strcpy(index_array[*insert_pos]->Term, operands);
299  index_num++;
300  memset(temp, 0, sizeof(temp));
301  sprintf(temp, "%c", '0' + *insert_pos);
302  strcat(vsl_string, temp);
303 
304  /* Increment the value in insert_pos, not the pointer ! */
305  (*insert_pos)++;
306  delete[]operands;
307  return 1;
308  }
309  else return 0;
310 }
311 
312 
313 
314 int Parser::complex_operand_check_and_process(char *token_obtained, char *input_string_copy)
315 {
316  /* Complex operand enclosed by / / (can't have / as part of
317  operand query in HQL) */
318 
319  char *ptr = NULL;
320  char *token_insert_pos = NULL;
321  char *ptr1 = NULL, *ptr2 = NULL;
322 
323  ptr = token_obtained;
324 
325  /* If the token obtained begins with a / */
326  if (*ptr == COMPLEX_SEP) {
327 
328  /* Skip past start delimiters */
329  while ((*ptr == COMPLEX_SEP) && (*ptr != 0))
330  ptr++;
331 
332  strcpy(token_obtained, ptr);
333  strcat(token_obtained, TOKEN_SEP);
334 
335 
336  ptr1 = token_obtained + strlen(token_obtained);
337 
338  /* Position just past token obtained in source string */
339  ptr2 = input_string_copy;
340 
341  while ((*ptr2 != COMPLEX_SEP) && (*ptr2 != 0))
342  *ptr1++ = *ptr2++;
343 
344  *ptr1 = 0;
345 
346  /* skip past multiple delimiters in input_string_copy */
347  while ((*ptr2 == COMPLEX_SEP) && (*ptr2 != 0))
348  ptr2++;
349 
350  /* Remove any leading spaces */
351  remove_leading(ptr2, ' ');
352  strcpy(input_string_copy, ptr2);
353 
354  }
355  return 1;
356 }
357 
358 
359 
360 int Parser::remove_leading(char *string, char char_to_remove)
361 {
362  char *ptr = NULL;
363 
364  ptr = string;
365 
366  while (*ptr == char_to_remove)
367  ptr++;
368 
369  strcpy(string, ptr);
370  return 1;
371 
372 }
373 
374 
375 
376 void Parser::api_get_token_from_string(char *token_obtained, char *token_delimiter, char *string)
377 {
378 
379  char *temp;
380  char *temp2;
381  char *strptr = NULL;
382  char* ptr;
383  temp=new char[strlen(string)+1];
384  temp2=new char[strlen(string)+1];
385  if(temp && temp2){
386 
387  /* Make 2 copies of string, one for strtok, one for strbrk */
388  remove_leading(string, ' ');//Fuehrende Leerzeichen
389  strcpy(temp, string);
390  strcpy(temp2, string);
391  // $$060425 Absicherung gegen fehlendes " oder ' am Ende
392  if(temp[0]=='"')
393  {
394  ptr=strchr(string+1,'"');
395  if(ptr)
396  { memcpy(token_obtained,string,ptr-string+1);
397  token_obtained[ptr-string+1]=0;
398  }
399  else { ptr=string+strlen(string)-1; strcpy(token_obtained,string); }
400  memmove(string,ptr+1,strlen(ptr));
401  }
402  else if(temp[0]==39)
403  {
404  ptr=strchr(string+1,39);
405  if(ptr)
406  { memcpy(token_obtained,string,ptr-string+1);
407  token_obtained[ptr-string+1]=0;
408  }
409  else { ptr=string+strlen(string)-1; strcpy(token_obtained,string); }
410  memmove(string,ptr+1,strlen(ptr));
411  }
412  else { /* See if a token exists */
413  strptr = strtok(temp, token_delimiter);
414 
415  /* If one does */
416  if (strptr != NULL) {
417 
418  /* Copy the token to token_obtained */
419  strcpy(token_obtained, strptr);
420 
421  /* Adjust string */
422 
423  /* Check if a delimiter follows the token */
424  strptr = strstr(temp2, token_delimiter);
425 
426  /* If one does */
427  if (strptr != NULL)
428 
429  /* Copy from after the token delimiter */
430  strcpy(string, strptr+strlen(token_delimiter));
431  else
432  /* If we have just processed the last token
433  in the string make the string empty */
434  strcpy(string, "");
435  } else
436  strcpy(token_obtained, "NO SUCH TOKEN");
437  }
438  delete[]temp;
439  delete[]temp2;
440  }
441 }
442 
443 
444 /* Get next non space character and check if character is that wanted.
445  If so return 1, if not, store character in buf and return 0 */
446 int Parser::nextis(char chgiven)
447 {
448 
449  char ch;
450 
451  ch = next();
452  if (ch != chgiven)
453  buf = ch;
454 
455  /* Return 0 or 1 */
456  return ch==chgiven;
457 }
458 
459 
460 
461 /* Get next non space character (from buf (if present) then string) */
462 int Parser::next(void)
463 {
464  int ch;
465 
466  do {
467  if (buf) {
468  ch = buf;
469  buf = 0;
470  } else {
471  ch = infix_str[infix_str_pos++];
472  }
473  } while (isspace(ch));
474  return ch;
475 }
476 
477 
478 
479 
480 
481 /* Convert following VSL expression from infix to postfix */
482 int Parser::expression(void)
483 {
484 
485  /* Get first digit (digit*digit if present) and add to postfix string
486  (digit*digit is converted to digit digit * before being added) */
487  if (term() == -1)
488  return -1;
489 
490  for(;;) {
491 
492  /* If next input character is + */
493  if (nextis('+')) {
494 
495  /* get next digit (digit*digit if present)
496  and add to postfix string
497  (digit*digit is converted to digit digit *
498  before being added) */
499  if (term() == -1)
500  return -1;
501 
502 
503  /* then add the + to the postfix string */
504  postfix_str[postfix_str_pos++] = '+';
505  } else
506  if (nextis('-')) {
507  if (term() == -1)
508  return -1;
509  postfix_str[postfix_str_pos++] = '-';
510  } else
511  break;
512  }
513 
514  return 1;
515 }
516 
517 
518 
519 
520 int Parser::term(void)
521 {
522 
523  /* Get next digit and add to postfix string being formed */
524  if (factor() == -1)
525  return -1;
526 
527  /* If the next character is a *, DONT yet add it
528  to the postfix string being formed. We now know though that
529  we have a digit*digit infix expression so ...*/
530  while (nextis('*')) {
531 
532  /* Get second digit and add to postfix string */
533  if (factor() == -1)
534  return -1;
535 
536  /* NOW add the * found earlier to postfix string */
537  postfix_str[postfix_str_pos++] = '*';
538 
539  }
540 
541  return 1;
542 }
543 
544 
545 
546 /* Get next digit from infix string and add to postfix string */
547 int Parser::factor(void)
548 {
549  char ch;
550 
551  /* Get next non space character from infix string */
552  ch=next();
553  if (ch>47) {
554 
555  /* Add digit (e.g. 1) to postfix string */
556  postfix_str[postfix_str_pos++] = ch;
557 
558 
559  /* If we dont have a next digit then we can have a complete
560  rpn sub expression in brackets ! */
561  } else
562  if (ch == '(') {
563  if (expression() == -1)
564  return -1;
565  if (!nextis(')')) {
566  sprintf(Aerror,"Query Error: ')' expected\n");
567  return -1;
568  }
569 
570  } else {
571  sprintf(Aerror,"Query Error: Operand or '(' expected\n");
572  return -1;
573  }
574 
575 
576  return 1;
577 }
578 
579 
580 char* Parser::GetArgument(int string_pos){
581  if(string_pos<index_num)
582  return(index_array[string_pos]->Term);
583  else return NULL;
584 }
585