A few days ago, I learned the principle of compilation and wrote a lexical analyzer for my own learning. The analyzer wrote a bit simple and needs to be improved. The code is as follows: #include
Void main () {file * in, * out; / * Store the input string and outputs the file * / char arr [max]; / * Arr array store word symbol * / char currentchar; / * CurrentChar storage Current input Character * / INT i = 0; / * A pointer * // * in Arr array * // * Open input file * / if ((in = fopen ("infile.txt", "r") == null) { Printf ("can not open file / n"); exit (0);} / * to write the output file * / if ((out = fopen ("Outfile.txt", "W") == NULL) {Printf ("cannout open outfile / n"); exit (1);} // Remove the opening space currentchar = fgetc (in); while (currentchar == ') {currentchar = fgetc (in);} int CH;
While (currentchaar! = '@') {/ * constant part judgment * / if (currentchar> = 'a' && currentchar <= 'z') || (Currentchar> = 'a' && currentchar <= 'z') | | (Currentchar> = '0' && currentchar <= '9') || currentchar == '(' || currentchar == ')' || currentchar == '{' || currentchar == '}' || CurrentChar == '>' || Currentchar == '<' || currentchar == ';' || currentchar == ':' || currentchar == ' ' || currentchar == '-' || currentchar == '*' || Currentchar == '/' || CurrentChar == '=' || CurrentChar == '!' || Currentchar == '&' || (CH = Currentchar) == '/ n' || Currentchar == '' || currentchar == '[' || currentchar == ']') {i = 0; // Clear Arr character array if (currentchar> = '0' && currentchar <= '9') {Arr [i ] = currentchar; currentchar = fgetc (in); // read into the next character while (currentchar> = '0' && currentchar <= '9') {Arr [i ] = currentchar; currentchar = fgetc (in) }} If (currentchaar! = '.') // If the current read symbol is not a decimal point, the constant is read, output this constant {Arr [i ] = '/ 0'; fprintf (out, "% s% D,% C% S% C% S / N "," (", 15, '" "); // formatting output to the target file, 15 represents a speech code, Arr is the internal code value, and the specified number itself is represented} else // The current character continues to read {Arr [i ] = currentchar; currentchar = fgetc (in); while (currentchar> = '0' && currentchar <= '9'
) {Arr [i ] = currentchar; currentchar = fgetc (in);} arr [i ] = '/ 0'; fprintf (out, "% S% D,% C% S% C% S / N", " (", 15, '"', arr, '"" ");} while (currentchar ==') {currentchar = fgetc (in);}} / * Judgment identifier and keyword section * / IF ((currentchaar> = 'a') || (currentchar> = 'a' && currentchar <= 'z') {i = 0; // Clear Arr character array while (currentchar> = 'a' && currentchar <= 'z') || (Currentchar> = '0' && currentchar <= '9') || (Currentchar> = 'a' && currentchar <= 'z') {Arr [i ] = currentchar CurrentChar = FGETC (IN);} // Compare the character arrays Arr and the keyword table, judge the word string is the keyword or the identifier Arr [i ] = '/ 0'; if (strcmp (Arr, "IF") == 0) FPRINTF (OUT, "% S% D,% C% S% C% c% S / N", "(", 0, '", arr,'", ")"); ELSE IF STRCMP (Arr, "Else") == 0) FPrintf (out, "% s% D,% C% S% C% c% S / N", "(", 1, '"', arr, '", ")"); Else IF (strcmp (arr, "while") == 0) fprintf (out, "% s% D,% C% S% C% S / N", "(", 2, '" ', Arr,' "" "); ELSE IF (Strcmp (Arr," Do ") == 0) FPrintf (out,"% s% D,% C% S% C% S / N ", "(", 3, '", arr,'", ")"); Else FPrintf (out, "% s% D,% C% S% C% c% S / N", "(", 14, '"', arr, '" "");} while (currentchar ==' ') {currentchar = fgetc (in);} // judgment identifier Includes: , -, *, /, (,), {,}, <,>,;,:,! =,> =, <=
==, == ') {fprintf (out, "% s% D,% C% C% C% c% S / N", "(", 9,' ", currentchar, '"" "" "); Currentchar = fgetc (in);} while (currentchar ==') {currentchar = fgetc (in);} if (currentchar == '-') {fprintf (out,"% S% D,% C% C% C% S / N "," (", 10, '", currentchar,' ""); currentchar = fgetc (in);} while (currentchar == '') {Currentchar = fgetc (in);} if (currentchar == '*') {fprintf (out, "% s% D,% C% C% C% c% S / N", "(", 11, '"', currentchar, '"', ")"); currentchar = fgetc (in);} while (currentchar == ') {currentchar = fgetc (in);} if (currentchar ==' / ') { FPRINTF (OUT, "% S% D,% C% C% C% c% S / N", "(", 12, '", currentchar,'" ",") "); currentchar = fgetc (in); } while (currentchar == ') {currentchar = fgetc (in);} if (currentchar =====) {i = 0; Arr [i ] = currentchar; currentchar = fgetc (in); if (currentchar = = '=') {Arr [i ] = currentchar; Arr [i ] = '/ 0'; fprintf (out, "% s% D,% C% S% C% S / N", "(", 13 , '"', arr, '" ""); currentchar = fgetc (in);} else {Arr [i ] =' / 0 '; fprintf (out, "% s% D,% C% S % C% S / N "," (", 17, '", arr,' "" ")");}} while (currentchar == ') {currentchar = fgetc (in); } if (currentchar == ';') {fprintf (out, "% s% D,% C% C% C% S / N", "(", 4, '", currentchar,'" "," ) ")
Currentchar = fgetc (in);} while (currentchar == ') {currentchar = fgetc (in);} if (currentchar ==' {') {fprintf (out, "% s% D,% C% C% C% S / N "," (", 5, '", currentchar,' "", ")"); currentchar = fgetc (in);} while (currentchar == ') {currentchar = fgetc (in );}} (currentchar == '}') {fprintf (out, "% s% D,% C% C% C% c% S / N", "(", 6, '", currentchar,'" , ")"); Currentchar = fgetc (in);} while (currentchar == ') {currentchar = fgetc (in);} if (currentchar ==' [') {fprintf (out, "% s% d) % C% C% C% S / N "," (", 20, '", ""); currentchar = fgetc (in);} while (currentchar ==') {CurrentChar = FGETC (IN);} If (currentchar == ']') {fprintf (out, "% s% D,% C% C% C% c% S / N", "(", 21, '" , currentchar, '"" "); currentchar = fgetc (in);} while (currentchar ==') {currentchar = fgetc (in);} if (currentchar == '(') {fprintf (OUT "% S% D,% C% C% C% S / N", "(", 7, '", currentchar,'" ")"); currentchar = fgetc (in);} while CurrentChar == '') {currentchar = fgetc (in);} if (currentchar == ')') {fprintf (out, "% s% D,% C% C% C% c% S / N", "(" , 8, '"', currentchar, '" "" ""); currentchar = fgetc (in);} while (currentchar ==) ') {Currentchar = fgetc (in);} if (currentchar ==') {i = 0; Arr [i ] = currentchar; currentchar = fgetc (in); if (currentchar == ':') {Arr [i
] = currentchar; Arr [i ] = '/ 0'; fprintf (out, "% s% D,% C% S% C% S / N", "(", 18, '", arr,'" ', ")"); Currentchar = fgetc (in);} else {arr [i ] =' / 0 '; fprintf (out, "% s% D,% C% S% C% S / N", " (", 16, '" ",");}} while (currentchar ==') {currentchar = fgetc (in);} if (currentchar == '>) {i = 0; Arr [i ] = currentchar; currentchar = fgetc (in); if (currentchar == '=') {Arr [i ] = currentchar; arr [i ] = '/ 0'; fprintf (out, "% S% D,% C% S% C% S / N "," (", 13, '"); currentchar = fgetc (in);} else {Arr [i ] = '/ 0'; FPRINTF (OUT, "% S% D,% C% S% C% c% S / N", "(", 13, '", arr,'", ")"); }}} while (currentchar == ') {currentchar = fgetc (in);} if (currentchar ==' <') {i = 0; Arr [i ] = currentchar; currentchar = fgetc (in); if (Currentchar == '=') {Arr [i ] = currentchar; Arr [i ] = '/ 0'; fprintf (out, "% s% D,% C% S% C% S / N", "(", 13, '"', arr, '" ""); currentchar = fgetc (in);} else {Arr [i ] =' / 0 '; fprintf (out, "% s% D,% C% S% c% S / N "," (", 13, '", arr,' "" ")");}} while (currentchar == ') {currentchar = fgetc (in);} (currentchar == '!') {i = 0; Arr [i ] = currentchar; c Urrentchar = fgetc (in); if (currentchar =====) {Arr [i ] =
Currentchar; Arr [i ] = '/ 0'; FPRINTF (OUT, "% S% D,% C% S% C% S / N", "(", 13, '"", arr,' ", ")"); Currentchar = fgetc (in);}} while (currentchar == ') {currentchar = fgetc (in);} if (currentchar ==' & ') {fprintf (out, "% s% d) % C% C% C% S / N ",", 19, '", currentchar,'" "") "); currentchar = fgetc (in);} while (currentchar == ') {Currentchar = fgetc (in);} while ((ch = currentchar) == '/ n') {currentchar = fgetc (in);} while (currentchar == ') {currentchar = fgetc (in);}} Else {fprintf (out, "% s,% c% c / n", "error", '"', currentchar, '"'); currentchar = fgetc (in);}} // End the beginning while statement / * Close the file exits * / fclose (in); fclose (out);}
Note: In the current directory you create a text document yourself, name INFILE.TXT, all the programs that need to be analyzed are written in this text document, and the end of the program must end with the "@" flag. The program results are output in the same directory, the file name is Outfile.txt, this file is automatically generated. The species code is not given, and it is very troublesome to lose. Lalala. . .
This program uses Microsoft Visual Studio .net debugging on the Win2000 Server machine.
November 11, 2004 1:02:22