From: RUSVX2::Jnet%"A4422DAB@AWIUNI11" "Erich Neuwirth" 19-NOV-1990 01:18:52.20 To: Eberhard Mattes , 'Stefan Momma' , Harald Krieger , 'Barbara Burr' CC: Subj: Received: From DEARN(MAILER) by DS0RUS54 with Jnet id 1075 for ZRFN0370@DS0RUS54; Mon, 19 Nov 90 01:17 A Received: by DEARN (Mailer R2.07) id 0854; Mon, 19 Nov 90 01:16:15 CET Date: Mon, 19 Nov 90 01:05:22 MEZ Reply-To: German TeX Users Communication List Sender: German TeX Users Communication List From: Erich Neuwirth To: Eberhard Mattes , 'Stefan Momma' , Harald Krieger , 'Barbara Burr' ======================================================================== 491 Date: Sun, 18 Nov 90 23:27:16 WUT From: Gustaf Neumann Subject: Programm zur Konvertierung von Umlauten To: TEX-D-L@DEARN Nachstehend folgt ein Lex-Programm zur Konvertierung von Texten mit 'Ascii-German'-Umlauten (Umlaute werden als Ae, Oe, ... geschrieben) in Texte mit (TeX)-Umlauten. Das Programm ist bei weitem noch nicht perfekt (und wird es auch nie sein), doch ist vielleicht fuer einige doch nuetzlich. Ich konnte mit dem Programm den Text des Buches \bibitem[{Neu88}]{neumann88} G.~Neumann: \T{Metaprogrammierung und Prolog}, Addison--Wesley, Bonn 1988. zur Gaenze fehlerfrei umsetzen (Ende der Werbeeinschaltung). Bekannte Problemkinder sind "Masse" ("im hohen Masse" versus "Gesteinsmasse" ) und "Busse" ("Autobusse" vs. "tuet Busse"). In beiden Faellen wird die jeweils erste Variante als richtig angenommen, die anderen Alternativen erreicht man durch "Gesteinsmas{}se" bzw. durch "tuet Bu{}sse". Ich nehme gerne noch weitere Ausnahmeregeln entgegen. -Gustaf neumann ------------------------------------------------------------------- Gustaf Neumann neumann@dec4.wu-wien.ac.at, neumann@awiwuw11.bitnet Vienna University of Economics and Business Administration Augasse 2-6, A-1090 Vienna, Austria Tel: +43 (222) 31-336 x4533 Fax 347-555 ------------------------------------- cut here -----diac.shar----------- # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # # Wrapped by neumann on Sun Nov 18 23:20:06 1990 # # This archive contains: # diac.l Makefile diacaux.c diacaux.h # LANG=""; export LANG echo x - diac.l cat >diac.l <<'@EOF' %{ /* diac.l * lex file for converting Ascii German into diacritical German * Version 1.0 written by * Dorai Sitaram, Rice University, 1990 dorai@titan.rice.edu * * Version 1.1: * General rewrite, using some Material from * H.Kaeslin, Behandlung der Umlaute bei der Verarbeitung deutscher * Texte unter Unix, in: it, Vol 1, 1988 * and Duden - die Rechtschreibung. * * Gustaf Neumann, Wirtschaftsuniversitaet Wien, October 1990 * neumann@dec4.wu-wien.ac.at neumann@awiwuw11.bitnet * * The resulting LaTeX file uses german.sty! * Representation of umlaut characters: \"a \"A \"o \"O \"u \"U {\ss} * The style file german.sty would allow "a "A "o "O "u "U "s * as well, but the latter representation makes it impossible to * to distinguish between umlaut characters and quoted text. This distinction * is necessesay in cases where quotes should be changed into opening and * closing german quotes (\glqq and \qrqq) in an automated way (another * lex program). * * If you do NOT want to use GERMAN.STY, replace underneath the ruleset * for \documentstyle with the following rule: \\documentstyle[^\}]*\} { printf("%s\n", yytext); printf("\\newskip\\zeeskip\n"); printf("\\zeeskip=0pt plus0pt minus0pt\n"); printf("\\def\\1{\\nobreak\\hskip\\zeeskip}\n"); printf("\\let\\umlaut\\\"\n"); printf("\\def\\\"#1{\\1\\umlaut#1\\1}\n"); printf("\\let\\oldss\\ss\n"); printf("\\def\\ss{\\1\\oldss\\1}\n"); } * * * To prevent the conversion from Ascii German into diacritical German, * it is necessary to insert empty groups into the words (e.g. Ka{}eslin). */ #include "diacaux.h" int i; %} %p 6500 %n 1000 %e 2500 %a 4000 %k 2500 %o 3500 V [AEIOUaeiou] C [B-DF-HJ-NP-TV-Zb-df-hj-np-tv-z] W [ "'\t\n,;!?().] b [ \t\n] %% \\documentstyle{b}*\{ printf("\\documentstyle[german]{"); \\documentstyle{b}*\[.*german.*\]{b}*\{ ECHO; \\documentstyle{b}*\[.*\]{b}*\{ { for(i=13;yytext[i]=='[';i++); printf("\\documentstyle[german,%s",&yytext[i+2]);} \\input{b}*\{[^\}]+\} { texfile = getfilenamebrack(&yytext[6]); tempfile = maketempfilename(texfile); printf("\\input{%s}", tempfile); dosubdiac(texfile, tempfile); } \\input{b}*[^ \t\n]+ { texfile = getfilename(&yytext[6]); tempfile = maketempfilename(texfile); printf("\\input %s", tempfile); dosubdiac(texfile, tempfile); } \\begin\{.+\} ECHO; \\end\{.+\} ECHO; \\[A-Za-z]+ ECHO; %{ /* ue */ %} [Rr]euessier printf("%ce\\\"ussier", yytext[0]); [^igGbB][Ee]ue ECHO; [QqAa]ue ECHO; [Uu]e[iu] ECHO; [Gg]etue{W} ECHO; [a-rt-z]tuend ECHO; {W}tuet{W} ECHO; [Nn]ichtstuend ECHO; [Nn]ichtstuer ECHO; Tuerei{W} ECHO; [a-z]tuerei ECHO; [a-z]tuerisch ECHO; [Aa]bzue[b-z][a-z]*[elr]n ECHO; [Aa]nzue[b-z][a-z]*[elr]n ECHO; [Aa]u[fs]zue[b-z][a-z]*[elr]n ECHO; [Ee]inzue[b-z][a-z]*[elr]n ECHO; [Hh]inzue[b-z][a-z]*[elr]n ECHO; [Mm]itzue[b-z][a-z]*[elr]n ECHO; [Nn]achzue[b-z][a-z]*[elr]n ECHO; [Vv]orzue[b-z][a-z]*[elr]n ECHO; [Ww]iederzue[b-z][a-z]*[elr]n ECHO; [Zz]ue[b-z][a-z]*[elr]n ECHO; [Zz]urueckzue[b-z][a-z]*[elr]n printf("%cur\\\"uckzu%s",yytext[0],&yytext[9]); tuendere ECHO; [Aa]biguen ECHO; [Aa]ffluen ECHO; [Bb]u[ea]nos ECHO; [Dd]uett ECHO; [Dd]uell ECHO; entuell ECHO; [Gg]raduell ECHO; [Gg]uerill ECHO; [Ii]ndividuen ECHO; [Ii]nfluen ECHO; Lueger ECHO; [krx]tuell ECHO; [Kk]ongruen ECHO; [Kk]onstituen ECHO; [Mm]enuett ECHO; [Mm]anuell ECHO; [Mm]igue[tl] ECHO; [Pp]irouett ECHO; [Pp]uerto ECHO; [Rr]esiduen ECHO; [Ss]tatue ECHO; [Ss]exuell ECHO; [Ss]uez ECHO; [Vv]enezuel ECHO; [Vv]isuell ECHO; [Zz]uerkannt ECHO; [Zz]uerteil ECHO; [Zz]uerst ECHO; %{ /* ae */ %} [Aa]ero ECHO; [Dd]odekae ECHO; [Hh]exae ECHO; [Ii]kosae ECHO; [Ii]srael ECHO; [Kk]afkaesk ECHO; aeuel printf("\\\"auel"); [Mm]ichael ECHO; [Mm]etae ECHO; [Oo]ctae ECHO; [Pp]entae ECHO; [Pp]harmae ECHO; [Rr]affael ECHO; [Rr]afael ECHO; [Rr]aphael ECHO; [Tt]etrae ECHO; [Tt]hemae ECHO; [Ss]chemae ECHO; [Ss]amuel ECHO; [Vv]alue{W} ECHO; [Tt]rue{W} ECHO; %{ /* oe */ %} [Aa]utoe ECHO; [Bb]enzoe ECHO; [Cc]hemoe ECHO; [Dd]iarrhoea ECHO; [Ee]lektroe ECHO; [Gg]oethe ECHO; [Hh]eroen ECHO; [Hh]o[ml]oe ECHO; [Hh]ydroe ECHO; [Ii]ndoeuro ECHO; Joel ECHO; [Kk]inoe ECHO; [Kk]oedukat ECHO; [Kk]oeffizi ECHO; [Kk]oerzi ECHO; [Kk]oexist ECHO; [Cc]oexist ECHO; [Kk]oenzym ECHO; [Kk]ontoe ECHO; [Ss]oeben ECHO; Soest ECHO; [Mm]etazoe ECHO; [Mm][ai][ck]roe ECHO; [Mm]onoe ECHO; [Nn]euroe ECHO; [Oo]boe ECHO; [Oo]erlikon ECHO; [Oo]ldesloe ECHO; [Oo]kto ECHO; [Oo]pto ECHO; [Pp]oesie ECHO; [Pp]oebene ECHO; [Pp]iezo ECHO; [Pp]hoto ECHO; [Pp]hysioe ECHO; [Pp]oe[mt]i ECHO; [Pp]oe[mt][^a-z] ECHO; [Pp]orto ECHO; [Pp]roenzy ECHO; [Pp]roto ECHO; [Pp]rotozoe ECHO; [Pp]seudo ECHO; [Pp]sycho ECHO; [Pp]yro ECHO; [Rr]adio ECHO; [Tt]otoer ECHO; [Tt]urbo ECHO; [Vv]ideo ECHO; %{ /* ss */ %} {V}sss printf("%c{\\ss}s",yytext[0]); [EeAu][iu]ss printf("%c%c{\\ss}", yytext[0],yytext[1]); {C}{V}sser{W} ECHO; {C}{V}sser{V} ECHO; {C}{V}ssen ECHO; [^r]uesse[ln] printf("%c\\\"usse%c",yytext[0],yytext[6]); luesse printf("l\\\"usse"); iess printf("ie{\\ss}"); ssung ECHO; ssel ECHO; ssoren ECHO; ssiez ECHO; ccess ECHO; ssidy ECHO; chss ECHO; ssch ECHO; sspr ECHO; ssier ECHO; nisse ECHO; lss ECHO; ss' ECHO; tionss ECHO; tss ECHO; ussisch ECHO; ungss ECHO; usserl{W} ECHO; [Aa]ssoz ECHO; [Aa]ssist ECHO; [Aa]ssemb ECHO; [Aa]uss[^e] ECHO; [Aa]usse[^rn] ECHO; [Aa]ussende ECHO; [Ee]sse ECHO; [Bb]isschen printf("%ci{\\ss}chen", yytext[0]); [Bb]usiness ECHO; [Bb]usse ECHO; [Bb]ussard ECHO; triebss ECHO; beitss ECHO; [Dd]iskussion ECHO; [Dd]issert ECHO; [Dd]asselb ECHO; [Ee]ssi ECHO; [Ff]lusse ECHO; [Ff]luess[ie] printf("%cl\\\"uss%c", yytext[0],yytext[6]); Grass ECHO; [Gg]enosse ECHO; [Gg]rosse printf("%cro{\\ss}e",yytext[0]); [Ii]nteress ECHO; [Kk]lass[ie] ECHO; [Kk]assette ECHO; [Ll]asse ECHO; [Ll]aessig printf("%c\\\"assig", yytext[0]); [Mm]assa[^nr] ECHO; [Mm]asseu ECHO; [Mm]isser{C} printf("%ci{\\ss}er%c", yytext[0],yytext[6]); [Mm]iss[ei] ECHO; [Ee]rmassen printf("%crma{\\ss}en", yytext[0]); [Mm]assi ECHO; [Pp]rivatissi ECHO; [Pp]assiv ECHO; [Pp]rozessor ECHO; [Ss]tossen printf("%cto{\\ss}en", yytext[0]); [Rr]essource ECHO; [Ww][ia]sse ECHO; {C}ss{C} ECHO; [AaOoUu]e printf("\\\"%c", yytext[0]); ss printf("{\\ss}"); @EOF chmod 644 diac.l echo x - Makefile cat >Makefile <<'@EOF' # # if you do not have flex available, deactivate the definitions of # LEX and LEXLIB; The program compiled with flex works also with the # standard lex library (-ll). # LEX=flex LEXLIB=-lfl PROGS= diac all: ${PROGS} diac: diac.l diacaux.h diacaux.c ${LEX} ${LFLAGS} diac.l cc -O ${DEFINES} -o $@ diacaux.c lex.yy.c ${LEXLIB} strip $@ rm lex.yy.c lex.yy.o diacaux.o clean: rm -f ${PROGS} *.o *~ #* core shar: shar diac.l Makefile diacaux.c diacaux.h > diac.shar @EOF chmod 644 Makefile echo x - diacaux.c cat >diacaux.c <<'@EOF' /* diacaux.c * to be linked with lex.yy.c from diac.l * written by Dorai Sitaram, Rice University, 1990 */ #include "diacaux.h" int slen(s) char *s; { int i; for (i = 1; s[i] != '\0'; i++) ; return i; } char *strap(s,t) char *s,*t; { char *r = (char *) malloc(slen(s) + slen(t)); int i,j; for (i = 0; s[i] != '\0'; i++) r[i] = s[i]; for (j = 0; t[j] != '\0'; i++, j++) r[i] = t[j]; r[i] = '\0'; return r; } char *getfilename(s) char *s; { char *r = (char *) malloc(slen(s)); int i,j; for (i = 0; s[i] == ' ' || s[i] == '\t' || s[i] == '\n'; i++) ; for (j = 0; s[i] != '\0'; i++, j++) r[j] = s[i]; r[j] = '\0'; return r; } char *getfilenamebrack(s) char *s; { char *r = (char *) malloc(slen(s)); int i,j; for (i = 0; s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '{'; i++) ; for (j = 0; s[i] != '}'; i++, j++) r[j] = s[i]; r[j] = '\0'; return r; } char *maketempfilename(s) char *s; { char *r = (char *)malloc(slen(s)); int i,j; for (i = 0, j = 0; s[j] != '\0'; i++, j++) { r[i] = s[j]; if (r[i] == '/') r[i] = '_'; } r[i] = '\0'; return strap("/tmp/",r); } void dosubdiac(s,t) char *s,*t; { system(strap("diac <", strap(texfile, strap(" > ", tempfile)))); } @EOF chmod 644 diacaux.c echo x - diacaux.h cat >diacaux.h <<'@EOF' /* diac.h * to be included in diac.l and diac.c * written by Dorai Sitaram, Rice University, 1990 */ char *texfile; char *tempfile; int slen(); char *strap(); char *getfilename(); char *getfilenamebrack(); char *maketempfilename(); void dosubdiac(); @EOF chmod 644 diacaux.h exit 0 ERICH NEUWIRTH BITNET (EARN): A4422DAB@AWIUNI11 INTERNET: a4422dab@Helios.EDVZ.UniVie.AC.AT Intitute for Statistics and Computer Science UNIVERSITY OF VIENNA, UNIVERSITAETSSTR. 5/9, A-1010 VIENNA, AUSTRIA