diff options
author | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
---|---|---|
committer | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
commit | e38d2351b83fa65c66ccde443777647ef5cb6cff (patch) | |
tree | 1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/translators/btparse | |
download | tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip |
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/translators/btparse')
39 files changed, 10811 insertions, 0 deletions
diff --git a/src/translators/btparse/Makefile.am b/src/translators/btparse/Makefile.am new file mode 100644 index 0000000..84af63b --- /dev/null +++ b/src/translators/btparse/Makefile.am @@ -0,0 +1,18 @@ +####### kdevelop will overwrite this part!!! (begin)########## +if !USE_LIBBTPARSE + +noinst_LIBRARIES = libbtparse.a + +AM_CPPFLAGS = $(all_includes) + +libbtparse_a_METASOURCES = AUTO + +libbtparse_a_SOURCES = bibtex_ast.c bibtex.c err.c ast.c scan.c util.c lex_auxiliary.c parse_auxiliary.c format_name.c string_util.c tex_tree.c names.c modify.c traversal.c sym.c macros.c error.c postprocess.c input.c init.c + +endif + +EXTRA_DIST = btparse.h init.c stdpccts.h attrib.h lex_auxiliary.h error.h parse_auxiliary.h prototypes.h tokens.h mode.h input.c postprocess.c error.c macros.c sym.h sym.c bt_debug.h traversal.c modify.c names.c my_alloca.h tex_tree.c string_util.c format_name.c antlr.h ast.h btconfig.h dlgdef.h parse_auxiliary.c lex_auxiliary.c util.c scan.c dlgauto.h ast.c err.h err.c bibtex.c bibtex_ast.c + +####### kdevelop will overwrite this part!!! (end)############ + +KDE_OPTIONS = noautodist diff --git a/src/translators/btparse/antlr.h b/src/translators/btparse/antlr.h new file mode 100644 index 0000000..f52aba6 --- /dev/null +++ b/src/translators/btparse/antlr.h @@ -0,0 +1,561 @@ +/* antlr.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#ifndef ANTLR_H +#define ANTLR_H + +#include "btconfig.h" + +/* + * Define all of the stack setup and manipulation of $i, #i variables. + * + * Notes: + * The type 'Attrib' must be defined before entry into this .h file. + */ + +#include <stdlib.h> +#include <string.h> + +typedef int ANTLRTokenType; +typedef unsigned char SetWordType; + +typedef char ANTLRChar; + + /* G u e s s S t u f f */ + +#ifdef ZZCAN_GUESS +#ifndef ZZINF_LOOK +#define ZZINF_LOOK +#endif +#endif + +#ifdef ZZCAN_GUESS +typedef struct _zzjmp_buf { + jmp_buf state; + } zzjmp_buf; +#endif + + +/* can make this a power of 2 for more efficient lookup */ +#ifndef ZZLEXBUFSIZE +#define ZZLEXBUFSIZE 2000 +#endif + +#define zzOvfChk \ + if ( zzasp <= 0 ) \ + { \ + fprintf(stderr, zzStackOvfMsg, __FILE__, __LINE__); \ + exit(PCCTS_EXIT_FAILURE); \ + } + +#ifndef ZZA_STACKSIZE +#define ZZA_STACKSIZE 400 +#endif +#ifndef ZZAST_STACKSIZE +#define ZZAST_STACKSIZE 400 +#endif + +#ifndef zzfailed_pred +#define zzfailed_pred(_p) \ + fprintf(stderr, "semantic error; failed predicate: '%s'\n",_p) +#endif + +#ifdef LL_K +#define LOOKAHEAD \ + int zztokenLA[LL_K]; \ + char zztextLA[LL_K][ZZLEXBUFSIZE]; \ + int zzlap = 0, zzlabase=0; /* labase only used for DEMAND_LOOK */ +#else +#define LOOKAHEAD \ + int zztoken; +#endif + +#ifndef zzcr_ast +#define zzcr_ast(ast,attr,tok,text) +#endif + +#ifdef DEMAND_LOOK +#define DemandLookData int zzdirty=1; +#else +#define DemandLookData +#endif + + /* S t a t e S t u f f */ + +#ifdef ZZCAN_GUESS +#define zzGUESS_BLOCK zzantlr_state zzst; int zzrv; +#define zzGUESS zzsave_antlr_state(&zzst); \ + zzguessing = 1; \ + zzrv = setjmp(zzguess_start.state); +#define zzGUESS_FAIL longjmp(zzguess_start.state, 1) +#define zzGUESS_DONE zzrestore_antlr_state(&zzst); +#define zzNON_GUESS_MODE if ( !zzguessing ) +#define zzGuessData \ + zzjmp_buf zzguess_start; \ + int zzguessing; +#else +#define zzGUESS_BLOCK +#define zzGUESS +#define zzGUESS_FAIL +#define zzGUESS_DONE +#define zzNON_GUESS_MODE +#define zzGuessData +#endif + +typedef struct _zzantlr_state { +#ifdef ZZCAN_GUESS + zzjmp_buf guess_start; + int guessing; +#endif + int asp; + int ast_sp; +#ifdef ZZINF_LOOK + int inf_lap; /* not sure we need to save this one */ + int inf_labase; + int inf_last; +#endif +#ifdef DEMAND_LOOK + int dirty; +#endif + +#ifdef LL_K + int tokenLA[LL_K]; + char textLA[LL_K][ZZLEXBUFSIZE]; + int lap; + int labase; +#else + int token; + char text[ZZLEXBUFSIZE]; +#endif + } zzantlr_state; + + + /* I n f i n i t e L o o k a h e a d */ + + +#ifdef ZZINF_LOOK +#define InfLookData \ + int *zzinf_tokens; \ + char **zzinf_text; \ + char *zzinf_text_buffer; \ + int *zzinf_line; \ + int zzinf_labase; \ + int zzinf_last; +#else +#define InfLookData +#endif + +#ifdef ZZINF_LOOK + +#ifndef ZZINF_DEF_TEXT_BUFFER_SIZE +#define ZZINF_DEF_TEXT_BUFFER_SIZE 20000 +#endif +#ifndef ZZINF_DEF_TOKEN_BUFFER_SIZE +#define ZZINF_DEF_TOKEN_BUFFER_SIZE 2000 +#endif +/* WARNING!!!!!! + * ZZINF_BUFFER_TEXT_CHUNK_SIZE must be > sizeof(text) largest possible token. + */ +#ifndef ZZINF_BUFFER_TEXT_CHUNK_SIZE +#define ZZINF_BUFFER_TEXT_CHUNK_SIZE 5000 +#endif +#ifndef ZZINF_BUFFER_TOKEN_CHUNK_SIZE +#define ZZINF_BUFFER_TOKEN_CHUNK_SIZE 1000 +#endif + +#if ZZLEXBUFSIZE > ZZINF_BUFFER_TEXT_CHUNK_SIZE +#define ZZINF_BUFFER_TEXT_CHUNK_SIZE ZZLEXBUFSIZE+5 +#endif + +/* make inf_look user-access macros */ +#ifdef LL_K +#define ZZINF_LA_VALID(i) (((zzinf_labase+i-1)-LL_K+1) <= zzinf_last) +#define ZZINF_LA(i) zzinf_tokens[(zzinf_labase+i-1)-LL_K+1] +#define ZZINF_LATEXT(i) zzinf_text[(zzinf_labase+i-1)-LL_K+1] +/* #define ZZINF_LINE(i) zzinf_line[(zzinf_labase+i-1)-LL_K+1]*/ +#else +#define ZZINF_LA_VALID(i) (((zzinf_labase+i-1)) <= zzinf_last) +#define ZZINF_LA(i) zzinf_tokens[(zzinf_labase+i-1)] +#define ZZINF_LATEXT(i) zzinf_text[(zzinf_labase+i-1)] +#endif + +#define inf_zzgettok _inf_zzgettok() +extern void _inf_zzgettok(); + +#endif /* ZZINF_LOOK */ + + +#ifdef LL_K + +#define ANTLR_INFO \ + Attrib zzempty_attr(void) {static Attrib a; return a;} \ + Attrib zzconstr_attr(int _tok, char *_text)\ + {Attrib a; zzcr_attr((&a),_tok,_text); return a;} \ + int zzasp=ZZA_STACKSIZE; \ + char zzStackOvfMsg[]="fatal: attrib/AST stack overflow %s(%d)!\n"; \ + Attrib zzaStack[ZZA_STACKSIZE]; DemandLookData \ + InfLookData \ + zzGuessData + +#else + +#define ANTLR_INFO \ + Attrib zzempty_attr(void) {static Attrib a; return a;} \ + Attrib zzconstr_attr(int _tok, char *_text)\ + {Attrib a; zzcr_attr((&a),_tok,_text); return a;} \ + int zzasp=ZZA_STACKSIZE; \ + char zzStackOvfMsg[]="fatal: attrib/AST stack overflow %s(%d)!\n"; \ + Attrib zzaStack[ZZA_STACKSIZE]; DemandLookData \ + InfLookData \ + zzGuessData + +#endif /* LL_k */ + + +#ifdef ZZINF_LOOK + +#ifdef LL_K +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead {zzdirty=LL_K; zzlap = zzlabase = 0;} +#else +#define zzPrimeLookAhead {zzlap = zzlabase = 0; zzfill_inf_look();\ + {int _i; for(_i=1;_i<=LL_K; _i++) \ + {zzCONSUME;} zzlap = zzlabase = 0;}} +#endif + +#else /* LL_K */ + +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead zzfill_inf_look(); zzdirty=1 +#else +#define zzPrimeLookAhead zzfill_inf_look(); inf_zzgettok + +#endif +#endif /* LL_K */ + +#else /* ZZINF_LOOK */ + +#ifdef LL_K +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead {zzdirty=LL_K; zzlap = zzlabase = 0;} +#else +#define zzPrimeLookAhead {int _i; zzlap = 0; for(_i=1;_i<=LL_K; _i++) \ + {zzCONSUME;} zzlap = 0;} +#endif + +#else + +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead zzdirty=1 +#else +#define zzPrimeLookAhead zzgettok() +#endif +#endif /* LL_K */ + +#endif /* ZZINF_LOOK */ + + +#ifdef LL_K +#define zzenterANTLRs(s) \ + zzlextext = &(zztextLA[0][0]); zzrdstr( s ); zzPrimeLookAhead; +#define zzenterANTLRf(f) \ + zzlextext = &(zztextLA[0][0]); zzrdfunc( f ); zzPrimeLookAhead; +#define zzenterANTLR(f) \ + zzlextext = &(zztextLA[0][0]); zzrdstream( f ); zzPrimeLookAhead; +#ifdef ZZINF_LOOK +#define zzleaveANTLR(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRf(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRs(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#else +#define zzleaveANTLR(f) +#define zzleaveANTLRf(f) +#define zzleaveANTLRs(f) +#endif + +#else + +#define zzenterANTLRs(s) \ + {static char zztoktext[ZZLEXBUFSIZE]; \ + zzlextext = zztoktext; zzrdstr( s ); zzPrimeLookAhead;} +#define zzenterANTLRf(f) \ + {static char zztoktext[ZZLEXBUFSIZE]; \ + zzlextext = zztoktext; zzrdfunc( f ); zzPrimeLookAhead;} +#define zzenterANTLR(f) \ + {static char zztoktext[ZZLEXBUFSIZE]; \ + zzlextext = zztoktext; zzrdstream( f ); zzPrimeLookAhead;} +#ifdef ZZINF_LOOK +#define zzleaveANTLR(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRf(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRs(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#else +#define zzleaveANTLR(f) +#define zzleaveANTLRf(f) +#define zzleaveANTLRs(f) +#endif + +#endif + +#define ANTLR(st, f) zzbufsize = ZZLEXBUFSIZE; \ + zzenterANTLR(f); \ + st; ++zzasp; \ + zzleaveANTLR(f); + +#define ANTLRm(st, f, _m) zzbufsize = ZZLEXBUFSIZE; \ + zzmode(_m); \ + zzenterANTLR(f); \ + st; ++zzasp; \ + zzleaveANTLR(f); + +#define ANTLRf(st, f) zzbufsize = ZZLEXBUFSIZE; \ + zzenterANTLRf(f); \ + st; ++zzasp; \ + zzleaveANTLRf(f); + +#define ANTLRs(st, s) zzbufsize = ZZLEXBUFSIZE; \ + zzenterANTLRs(s); \ + st; ++zzasp; \ + zzleaveANTLRs(s); + +#ifdef LL_K +#define zztext (&(zztextLA[zzlap][0])) +#else +#define zztext zzlextext +#endif + + + /* A r g u m e n t A c c e s s */ + +#define zzaCur (zzaStack[zzasp]) +#define zzaRet (*zzaRetPtr) +#define zzaArg(v,n) zzaStack[v-n] +#define zzMakeAttr { zzNON_GUESS_MODE {zzOvfChk; --zzasp; zzcr_attr(&(zzaStack[zzasp]),LA(1),LATEXT(1));}} +#ifdef zzdef0 +#define zzMake0 { zzOvfChk; --zzasp; zzdef0(&(zzaStack[zzasp]));} +#else +#define zzMake0 { zzOvfChk; --zzasp;} +#endif +#define zzaPush(_v) { zzOvfChk; zzaStack[--zzasp] = _v;} +#ifndef zzd_attr +#define zzREL(t) zzasp=(t); /* Restore state of stack */ +#else +#define zzREL(t) for (; zzasp<(t); zzasp++) \ + { zzd_attr(&(zzaStack[zzasp])); } +#endif + +#define zzsetmatch(_es) \ + if ( !_zzsetmatch(_es, &zzBadText, &zzMissText, &zzMissTok, &zzBadTok, &zzMissSet) ) goto fail; +#define zzsetmatch_wsig(_es, handler) \ + if ( !_zzsetmatch_wsig(_es) ) {_signal=MismatchedToken; goto handler;} + +extern int _zzsetmatch(SetWordType *, char **, char **, int *, int *, SetWordType **); +extern int _zzsetmatch_wsig(SetWordType *); + +#define zzmatch(_t) \ + if ( !_zzmatch(_t, &zzBadText, &zzMissText, &zzMissTok, &zzBadTok, &zzMissSet) ) goto fail; +#define zzmatch_wsig(_t,handler) \ + if ( !_zzmatch_wsig(_t) ) {_signal=MismatchedToken; goto handler;} + +extern int _zzmatch(int, const char **, const char **, int *, int *, SetWordType **); +extern int _zzmatch_wsig(int); + +#define zzmatch_wdfltsig(_t,_f) \ + if ( !_zzmatch_wdfltsig(_t,_f) ) _signal=MismatchedToken; +#define zzsetmatch_wdfltsig(tw,tt,wf) \ + if ( !_zzsetmatch_wdfltsig(tw,tt,wf) ) _signal=MismatchedToken; + +extern int _zzmatch_wdfltsig(int, SetWordType *); +extern int _zzsetmatch_wdfltsig(SetWordType *tokensWanted, + int tokenTypeOfSet, + SetWordType *whatFollows); + +#ifdef GENAST +#define zzRULE Attrib *zzaRetPtr = &(zzaStack[zzasp-1]); \ + SetWordType *zzMissSet=NULL; int zzMissTok=0; \ + int zzBadTok=0; const char *zzBadText=""; \ + int zzErrk=1; \ + const char *zzMissText=""; zzASTVars +#else +#define zzRULE Attrib *zzaRetPtr = &(zzaStack[zzasp-1]); \ + int zzBadTok=0; const char *zzBadText=""; \ + int zzErrk=1; \ + SetWordType *zzMissSet=NULL; int zzMissTok=0; const char *zzMissText="" +#endif + +#ifdef GENAST +#define zzBLOCK(i) int i = zzasp - 1; int zztsp = zzast_sp +#define zzEXIT(i) zzREL(i); zzastREL; zzNON_GUESS_MODE { zzastPush(*_root); } +#define zzLOOP(i) zzREL(i); zzastREL +#else +#define zzBLOCK(i) int i = zzasp - 1 +#define zzEXIT(i) zzREL(i) +#define zzLOOP(i) zzREL(i) +#endif + +#ifdef LL_K + +#ifdef DEMAND_LOOK +#define LOOK(_k) {int i,stop=_k-(LL_K-zzdirty); for (i=1; i<=stop; i++) \ + zzCONSUME;} +#define zzCONSUME {zzgettok(); zzdirty--; \ + zzlap = (zzlap+1)&(LL_K-1); \ + zzlextext = &(zztextLA[zzlap][0]);} +#else +#ifdef ZZINF_LOOK +#define zzCONSUME {inf_zzgettok; \ + zzlap = (zzlap+1)&(LL_K-1); \ + zzlextext = &(zztextLA[zzlap][0]); \ + } +#else +#define zzCONSUME {zzgettok(); \ + zzlap = (zzlap+1)&(LL_K-1); \ + zzlextext = &(zztextLA[zzlap][0]);} +#endif /* ZZINF_LOOK */ +#endif /* DEMAND_LOOK */ + +#else /* LL_K */ + +#ifdef DEMAND_LOOK +#define LOOK(_k) if ( zzdirty) zzCONSUME; +#ifdef ZZINF_LOOK +#define zzCONSUME inf_zzgettok; zzdirty=0; +#else +#define zzCONSUME zzgettok(); zzdirty=0; +#endif /* ZZINF_LOOK */ + +#else /* DEMAND_LOOK */ + +#ifdef ZZINF_LOOK +#define zzCONSUME inf_zzgettok +#else +#define zzCONSUME zzgettok(); +#endif + +#endif /* DEMAND_LOOK */ + +#endif /* LL_K */ + +#ifdef LL_K +#define NLA zztokenLA[zzlap&(LL_K-1)] /* --> next LA */ +#define NLATEXT zztextLA[zzlap&(LL_K-1)] /* --> next text of LA */ +#ifdef DEMAND_LOOK +#define LA(i) zztokenLA[(zzlabase+(i)-1)&(LL_K-1)] +#define LATEXT(i) (&(zztextLA[(zzlabase+(i)-1)&(LL_K-1)][0])) +#else +#define LA(i) zztokenLA[(zzlap+(i)-1)&(LL_K-1)] +#define LATEXT(i) (&(zztextLA[(zzlap+(i)-1)&(LL_K-1)][0])) +#endif +#else +#define NLA zztoken +#define NLATEXT zztext +#define LA(i) zztoken +#define LATEXT(i) zztext +#endif + + + /* S t a n d a r d S i g n a l s */ + +#define NoSignal 0 +#define MismatchedToken 1 +#define NoViableAlt 2 +#define NoSemViableAlt 3 + + + /* F u n c t i o n T r a c i n g */ + +#ifndef zzTRACEIN +#define zzTRACEIN(r) fprintf(stderr, "enter rule \"%s\"\n", r); +#endif +#ifndef zzTRACEOUT +#define zzTRACEOUT(r) fprintf(stderr, "exit rule \"%s\"\n", r); +#endif + +#ifdef ZZWCHAR_T +#define zzchar_t unsigned wchar_t +#else +#define zzchar_t unsigned char +#endif + + /* E x t e r n D e f s */ + +extern Attrib zzempty_attr(void); +extern Attrib zzconstr_attr(int, char *); +extern void zzsyn(const char *, int, char *, SetWordType *, int, int, const char *); +extern int zzset_el(unsigned, SetWordType *); +extern int zzset_deg(SetWordType *); +extern void zzedecode(SetWordType *); +extern void zzFAIL(int k, ...); +extern void zzresynch(SetWordType *, SetWordType); +extern void zzsave_antlr_state(zzantlr_state *); +extern void zzrestore_antlr_state(zzantlr_state *); +extern void zzfill_inf_look(void); +#ifdef EXCEPTION_HANDLING +extern void zzdflthandlers(int, int *); +#endif + + /* G l o b a l V a r i a b l e s */ + +/* Define a parser; user should do a "#parser myname" in their grammar file */ +/*extern struct pccts_parser zzparser;*/ + +extern const char *zztokens[]; +#ifdef LL_K +extern int zztokenLA[]; +extern char zztextLA[][ZZLEXBUFSIZE]; +extern int zzlap; +extern int zzlabase; +#else +extern int zztoken; +#endif + +extern char zzStackOvfMsg[]; +extern int zzasp; +extern Attrib zzaStack[]; +#ifdef ZZINF_LOOK +extern int *zzinf_tokens; +extern char **zzinf_text; +extern char *zzinf_text_buffer; +extern int *zzinf_line; +extern int zzinf_labase; +extern int zzinf_last; +#endif +#ifdef DEMAND_LOOK +extern int zzdirty; +#endif +#ifdef ZZCAN_GUESS +extern int zzguessing; +extern zzjmp_buf zzguess_start; +#endif + +/* Define global veriables that refer to values exported by the scanner. + * These declarations duplicate those in dlgdef.h, but are needed + * if ANTLR is not to generate a .dlg file (-gx); PS, this is a hack. + */ +extern zzchar_t *zzlextext; /* text of most recently matched token */ +extern int zzbufsize; /* how long zzlextext is */ + +#endif diff --git a/src/translators/btparse/ast.c b/src/translators/btparse/ast.c new file mode 100644 index 0000000..d433f79 --- /dev/null +++ b/src/translators/btparse/ast.c @@ -0,0 +1,227 @@ +/* Abstract syntax tree manipulation functions + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include <stdarg.h> +#include <stdio.h> + +#include "ast.h" +#include "attrib.h" +#include "antlr.h" + +/* ensure that tree manipulation variables are current after a rule + * reference + */ +void +zzlink(AST **_root, AST **_sibling, AST **_tail) +{ + if ( *_sibling == NULL ) return; + if ( *_root == NULL ) *_root = *_sibling; + else if ( *_root != *_sibling ) (*_root)->down = *_sibling; + if ( *_tail==NULL ) *_tail = *_sibling; + while ( (*_tail)->right != NULL ) *_tail = (*_tail)->right; +} + +AST * +zzastnew(void) +{ + AST *p = (AST *) calloc(1, sizeof(AST)); + if ( p == NULL ) fprintf(stderr,"%s(%d): cannot allocate AST node\n",__FILE__,__LINE__); + return p; +} + +/* add a child node to the current sibling list */ +void +zzsubchild(AST **_root, AST **_sibling, AST **_tail) +{ + AST *n; + zzNON_GUESS_MODE { + n = zzastnew(); +#ifdef DEMAND_LOOK + zzcr_ast(n, &(zzaCur), LA(0), LATEXT(0)); +#else + zzcr_ast(n, &(zzaCur), LA(1), LATEXT(1)); +#endif + zzastPush( n ); + if ( *_tail != NULL ) (*_tail)->right = n; + else { + *_sibling = n; + if ( *_root != NULL ) (*_root)->down = *_sibling; + } + *_tail = n; + if ( *_root == NULL ) *_root = *_sibling; + } +} + +/* make a new AST node. Make the newly-created + * node the root for the current sibling list. If a root node already + * exists, make the newly-created node the root of the current root. + */ +void +zzsubroot(AST **_root, AST **_sibling, AST **_tail) +{ + AST *n; + zzNON_GUESS_MODE { + n = zzastnew(); +#ifdef DEMAND_LOOK + zzcr_ast(n, &(zzaCur), LA(0), LATEXT(0)); +#else + zzcr_ast(n, &(zzaCur), LA(1), LATEXT(1)); +#endif + zzastPush( n ); + if ( *_root != NULL ) + if ( (*_root)->down == *_sibling ) *_sibling = *_tail = *_root; + *_root = n; + (*_root)->down = *_sibling; + } +} + +/* Apply function to root then each sibling + * example: print tree in child-sibling LISP-format (AST has token field) + * + * void show(tree) + * AST *tree; + * { + * if ( tree == NULL ) return; + * printf(" %s", zztokens[tree->token]); + * } + * + * void before() { printf(" ("); } + * void after() { printf(" )"); } + * + * LISPdump() { zzpre_ast(tree, show, before, after); } + * + */ +void +zzpre_ast( + AST *tree, + void (*func)(AST *), /* apply this to each tree node */ + void (*before)(AST *), /* apply this to root of subtree before preordering it */ + void (*after)(AST *)) /* apply this to root of subtree after preordering it */ +{ + while ( tree!= NULL ) + { + if ( tree->down != NULL ) (*before)(tree); + (*func)(tree); + zzpre_ast(tree->down, func, before, after); + if ( tree->down != NULL ) (*after)(tree); + tree = tree->right; + } +} + +/* free all AST nodes in tree; apply func to each before freeing */ +void +zzfree_ast(AST *tree) +{ + if ( tree == NULL ) return; + zzfree_ast( tree->down ); + zzfree_ast( tree->right ); + zztfree( tree ); +} + +/* build a tree (root child1 child2 ... NULL) + * If root is NULL, simply make the children siblings and return ptr + * to 1st sibling (child1). If root is not single node, return NULL. + * + * Siblings that are actually siblins lists themselves are handled + * correctly. For example #( NULL, #( NULL, A, B, C), D) results + * in the tree ( NULL A B C D ). + * + * Requires at least two parameters with the last one being NULL. If + * both are NULL, return NULL. + */ +AST *zztmake(AST *rt, ...) +{ + va_list ap; + register AST *child, *sibling=NULL, *tail, *w; + AST *root; + + va_start(ap, rt); + root = rt; + + if ( root != NULL ) + if ( root->down != NULL ) return NULL; + child = va_arg(ap, AST *); + while ( child != NULL ) + { + for (w=child; w->right!=NULL; w=w->right) {;} /* find end of child */ + if ( sibling == NULL ) {sibling = child; tail = w;} + else {tail->right = child; tail = w;} + child = va_arg(ap, AST *); + } + if ( root==NULL ) root = sibling; + else root->down = sibling; + va_end(ap); + return root; +} + +/* tree duplicate */ +AST * +zzdup_ast(AST *t) +{ + AST *u; + + if ( t == NULL ) return NULL; + u = zzastnew(); + *u = *t; +#ifdef zzAST_DOUBLE + u->up = NULL; /* set by calling invocation */ + u->left = NULL; +#endif + u->right = zzdup_ast(t->right); + u->down = zzdup_ast(t->down); +#ifdef zzAST_DOUBLE + if ( u->right!=NULL ) u->right->left = u; + if ( u->down!=NULL ) u->down->up = u; +#endif + return u; +} + +void +zztfree(AST *t) +{ +#ifdef zzd_ast + zzd_ast( t ); +#endif + free( t ); +} + +#ifdef zzAST_DOUBLE +/* + * Set the 'up', and 'left' pointers of all nodes in 't'. + * Initial call is double_link(your_tree, NULL, NULL). + */ +void +zzdouble_link(AST *t, AST *left, AST *up) +{ + if ( t==NULL ) return; + t->left = left; + t->up = up; + zzdouble_link(t->down, NULL, t); + zzdouble_link(t->right, t, up); +} +#endif diff --git a/src/translators/btparse/ast.h b/src/translators/btparse/ast.h new file mode 100644 index 0000000..59622ec --- /dev/null +++ b/src/translators/btparse/ast.h @@ -0,0 +1,99 @@ +/* Abstract syntax tree + * + * Macros, definitions + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ZZAST_H +#define ZZAST_H + +#define zzastOvfChk \ + if ( zzast_sp <= 0 ) \ + { \ + fprintf(stderr, zzStackOvfMsg, __FILE__, __LINE__); \ + exit(PCCTS_EXIT_FAILURE); \ + } + +#ifndef USER_DEFINED_AST +#ifndef AST_FIELDS +#define AST_FIELDS +#endif + +typedef struct _ast { + struct _ast *right, *down; +#ifdef zzAST_DOUBLE + struct _ast *left, *up; +#endif + AST_FIELDS +} AST; + +#else + +#ifdef zzAST_DOUBLE +#define AST_REQUIRED_FIELDS struct _ast *right, *down, *left, *up; +#else +#define AST_REQUIRED_FIELDS struct _ast *right, *down; +#endif + +#endif + + +/* N o d e a c c e s s m a c r o s */ +#define zzchild(t) (((t)==NULL)?NULL:(t->down)) +#define zzsibling(t) (((t)==NULL)?NULL:(t->right)) + + +/* define global variables needed by #i stack */ +#define zzASTgvars \ + AST *zzastStack[ZZAST_STACKSIZE]; \ + int zzast_sp = ZZAST_STACKSIZE; + +#define zzASTVars AST *_ast = NULL, *_sibling = NULL, *_tail = NULL +#define zzSTR ( (_tail==NULL)?(&_sibling):(&(_tail->right)) ) +#define zzastCur (zzastStack[zzast_sp]) +#define zzastArg(i) (zzastStack[zztsp-i]) +#define zzastPush(p) zzastOvfChk; zzastStack[--zzast_sp] = p; +#define zzastDPush --zzast_sp +#define zzastMARK zztsp=zzast_sp; /* Save state of stack */ +#define zzastREL zzast_sp=zztsp; /* Return state of stack */ +#define zzrm_ast {zzfree_ast(*_root); _tail = _sibling = (*_root)=NULL;} + +extern int zzast_sp; +extern AST *zzastStack[]; + +void zzlink(AST **, AST **, AST **); +void zzsubchild(AST **, AST **, AST **); +void zzsubroot(AST **, AST **, AST **); +void zzpre_ast(AST *, void (*)(), void (*)(), void (*)()); +void zzfree_ast(AST *); +AST *zztmake(AST *, ...); +AST *zzdup_ast(AST *); +void zztfree(AST *); +void zzdouble_link(AST *, AST *, AST *); +AST *zzastnew(void); +#endif diff --git a/src/translators/btparse/attrib.h b/src/translators/btparse/attrib.h new file mode 100644 index 0000000..6a3cecf --- /dev/null +++ b/src/translators/btparse/attrib.h @@ -0,0 +1,35 @@ +/* ------------------------------------------------------------------------ +@NAME : attrib.h +@DESCRIPTION: Definition of the Attrib type needed by the PCCTS- + generated parser. +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: attrib.h,v 1.3 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef ATTRIB_H +#define ATTRIB_H + +/* + * Defining Attrib this way (as opposed to making it a pointer to a struct) + * avoid the expense of allocating/deallocating a structure for each token; + * this way, PCCTS statically allocates the whole stack once and that's + * it. (Of course, the stack is four times bigger than it would have been + * otherwise.) + */ + +typedef struct { + int line; + int offset; + int token; + char *text; +} Attrib; + +#endif /* ATTRIB_H */ diff --git a/src/translators/btparse/bibtex.c b/src/translators/btparse/bibtex.c new file mode 100644 index 0000000..c922803 --- /dev/null +++ b/src/translators/btparse/bibtex.c @@ -0,0 +1,312 @@ +/* + * A n t l r T r a n s l a t i o n H e a d e r + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btconfig.h" +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +#include "parse_auxiliary.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#define GENAST + +#include "ast.h" + +#define zzSET_SIZE 4 +#include "antlr.h" +#include "tokens.h" +#include "dlgdef.h" +#include "mode.h" +#ifndef PURIFY +#define PURIFY(r,s) +#endif +#include "ast.c" +zzASTgvars + +ANTLR_INFO + +void +bibfile(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + AST *last; (*_root) = NULL; + { + zzBLOCK(zztasp2); + zzMake0; + { + while ( (LA(1)==AT) ) { + _ast = NULL; entry(&_ast); + /* a little creative forestry... */ + if ((*_root) == NULL) + (*_root) = zzastArg(1); + else + last->right = zzastArg(1); + last = zzastArg(1); + zzLOOP(zztasp2); + } + zzEXIT(zztasp2); + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x1); + } +} + +void +entry(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + bt_metatype metatype; + zzmatch(AT); zzCONSUME; + zzmatch(NAME); zzsubroot(_root, &_sibling, &_tail); + + metatype = entry_metatype(); + zzastArg(1)->nodetype = BTAST_ENTRY; + zzastArg(1)->metatype = metatype; + zzCONSUME; + + body(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail); + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x2); + } +} + +void +body(AST**_root, bt_metatype metatype ) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (LA(1)==STRING) ) { + if (!(metatype == BTE_COMMENT )) {zzfailed_pred(" metatype == BTE_COMMENT ");} + zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_STRING; + zzCONSUME; + + } + else { + if ( (LA(1)==ENTRY_OPEN) ) { + zzmatch(ENTRY_OPEN); zzCONSUME; + contents(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail); + zzmatch(ENTRY_CLOSE); zzCONSUME; + } + else {zzFAIL(1,zzerr1,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x4); + } +} + +void +contents(AST**_root, bt_metatype metatype ) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (setwd1[LA(1)]&0x8)&&(metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ ) ) { + if (!(metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ )) {zzfailed_pred(" metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ ");} + { + zzBLOCK(zztasp2); + zzMake0; + { + if ( (LA(1)==NAME) ) { + zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail); zzCONSUME; + } + else { + if ( (LA(1)==NUMBER) ) { + zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail); zzCONSUME; + } + else {zzFAIL(1,zzerr2,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + zzEXIT(zztasp2); + } + } + zzastArg(1)->nodetype = BTAST_KEY; + zzmatch(COMMA); zzCONSUME; + fields(zzSTR); zzlink(_root, &_sibling, &_tail); + } + else { + if ( (setwd1[LA(1)]&0x10)&&(metatype == BTE_MACRODEF ) ) { + if (!(metatype == BTE_MACRODEF )) {zzfailed_pred(" metatype == BTE_MACRODEF ");} + fields(zzSTR); zzlink(_root, &_sibling, &_tail); + } + else { + if ( (setwd1[LA(1)]&0x20)&&(metatype == BTE_PREAMBLE ) ) { + if (!(metatype == BTE_PREAMBLE )) {zzfailed_pred(" metatype == BTE_PREAMBLE ");} + value(zzSTR); zzlink(_root, &_sibling, &_tail); + } + else {zzFAIL(1,zzerr3,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x40); + } +} + +void +fields(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (LA(1)==NAME) ) { + field(zzSTR); zzlink(_root, &_sibling, &_tail); + { + zzBLOCK(zztasp2); + zzMake0; + { + if ( (LA(1)==COMMA) ) { + zzmatch(COMMA); zzCONSUME; + fields(zzSTR); zzlink(_root, &_sibling, &_tail); + } + zzEXIT(zztasp2); + } + } + } + else { + if ( (LA(1)==ENTRY_CLOSE) ) { + } + else {zzFAIL(1,zzerr4,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x80); + } +} + +void +field(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + zzmatch(NAME); zzsubroot(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_FIELD; check_field_name (zzastArg(1)); + zzCONSUME; + + zzmatch(EQUALS); zzCONSUME; + value(zzSTR); zzlink(_root, &_sibling, &_tail); + +#if DEBUG > 1 + printf ("field: fieldname = %p (%s)\n" + " first val = %p (%s)\n", + zzastArg(1)->text, zzastArg(1)->text, zzastArg(2)->text, zzastArg(2)->text); +#endif + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd2, 0x1); + } +} + +void +value(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + simple_value(zzSTR); zzlink(_root, &_sibling, &_tail); + { + zzBLOCK(zztasp2); + zzMake0; + { + while ( (LA(1)==HASH) ) { + zzmatch(HASH); zzCONSUME; + simple_value(zzSTR); zzlink(_root, &_sibling, &_tail); + zzLOOP(zztasp2); + } + zzEXIT(zztasp2); + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd2, 0x2); + } +} + +void +simple_value(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (LA(1)==STRING) ) { + zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_STRING; + zzCONSUME; + + } + else { + if ( (LA(1)==NUMBER) ) { + zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_NUMBER; + zzCONSUME; + + } + else { + if ( (LA(1)==NAME) ) { + zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_MACRO; + zzCONSUME; + + } + else {zzFAIL(1,zzerr5,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd2, 0x4); + } +} diff --git a/src/translators/btparse/bibtex_ast.c b/src/translators/btparse/bibtex_ast.c new file mode 100644 index 0000000..354cefb --- /dev/null +++ b/src/translators/btparse/bibtex_ast.c @@ -0,0 +1,63 @@ +/* ------------------------------------------------------------------------ +@NAME : bibtex_ast.c +@DESCRIPTION: Data and functions for internal display/manipulation of AST + nodes. (Stuff for external consumption, and for processing + whole trees, is to be found in traversal.c.) +@GLOBALS : +@CREATED : 1997/08/12, Greg Ward +@MODIFIED : +@VERSION : $Id: bibtex_ast.c,v 1.6 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include "btparse.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + + +const char *nodetype_names[] = +{ + "bogus", "entry", "key", "field", "string", "number", "macro" +}; + + +static void dump (AST *root, int depth) +{ + AST *cur; + + if (root == NULL) + { + printf ("[empty]\n"); + return; + } + + cur = root; + while (cur != NULL) + { + printf ("%*s[%s]: ", 2*depth, "", nodetype_names[cur->nodetype]); + if (cur->text != NULL) + printf ("(%s)\n", cur->text); + else + printf ("(null)\n"); + + if (cur->down != NULL) + dump (cur->down, depth+1); + cur = cur->right; + } +} + + +void dump_ast (char *msg, AST *root) +{ + if (msg != NULL) + printf (msg); + dump (root, 0); + printf ("\n"); +} diff --git a/src/translators/btparse/bt_debug.h b/src/translators/btparse/bt_debug.h new file mode 100644 index 0000000..913ae1a --- /dev/null +++ b/src/translators/btparse/bt_debug.h @@ -0,0 +1,38 @@ +/* ------------------------------------------------------------------------ +@NAME : bt_debug.h +@DESCRIPTION: Defines various macros needed for compile-time selection + of debugging code. +@GLOBALS : +@CREATED : +@MODIFIED : +@VERSION : $Id: bt_debug.h,v 1.2 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef BT_DEBUG_H +#define BT_DEBUG_H + +/* + * DEBUG is the debug level -- an integer, defaults to 0 + * DBG_ACTION is a macro to conditionally execute a bit of code -- + * must have compiled with DEBUG true, and the debug level + * must be >= `level' (the macro argument) + */ + +#ifndef DEBUG +# define DEBUG 0 +#endif + +#if DEBUG +# define DBG_ACTION(level,action) if (DEBUG >= level) { action; } +#else +# define DBG_ACTION(level,action) +#endif + +#endif /* BT_DEBUG_H */ diff --git a/src/translators/btparse/btconfig.h b/src/translators/btparse/btconfig.h new file mode 100644 index 0000000..7405825 --- /dev/null +++ b/src/translators/btparse/btconfig.h @@ -0,0 +1,220 @@ +#ifndef BTPARSE_CONFIG_H +#define BTPARSE_CONFIG_H +/* + * config.h (for ANTLR, DLG, and SORCERER) + * + * This is a simple configuration file that doesn't have config stuff + * in it, but it's a start. + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * Used by PCCTS 1.33 (SORCERER 1.00B11 and up) + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +/* This file knows about the following ``environments'' + UNIX (default) + DOS (use #define PC) + MAC (use #define MPW; has a few things for THINK C, Metrowerks) + */ + +/* +* Define PC32 if in a 32-bit PC environment (e.g. extended DOS or Win32). +* The macros tested here are defined by Watcom, Microsoft, Borland, +* and djgpp, respectively, when they are used as 32-bit compilers. +* Users of these compilers *must* be sure to define PC in their +* makefiles for this to work correctly. +*/ +#ifdef PC +# if (defined(__WATCOM__) || defined(_WIN32) || defined(__WIN32__) || \ + defined(__GNUC__) || defined(__GNUG__)) +# ifndef PC32 +# define PC32 +# endif +# endif +#endif + +#ifdef PC +#define ATOKEN_H "AToken.h" +#define ATOKPTR_H "ATokPtr.h" +#define ATOKPTR_C "ATokPtr.cpp" +#define ATOKENBUFFER_H "ATokBuf.h" +#define ATOKENBUFFER_C "ATokBuf.cpp" +#define ATOKENSTREAM_H "ATokStr.h" +#define APARSER_H "AParser.h" +#define APARSER_C "AParser.cpp" +#define ASTBASE_H "ASTBase.h" +#define ASTBASE_C "ASTBase.cpp" +#define PCCTSAST_C "PCCTSAST.cpp" +#define LIST_C "List.cpp" +#define DLEXERBASE_H "DLexBase.h" +#define DLEXERBASE_C "DLexBase.cpp" +#define DLEXER_C "DLexer.cpp" +#define STREESUPPORT_C "STreeSup.C" +#else +#define ATOKEN_H "AToken.h" +#define ATOKPTR_H "ATokPtr.h" +#define ATOKPTR_C "ATokPtr.cpp" +#define ATOKENBUFFER_H "ATokenBuffer.h" +#define ATOKENBUFFER_C "ATokenBuffer.cpp" +#define ATOKENSTREAM_H "ATokenStream.h" +#define APARSER_H "AParser.h" +#define APARSER_C "AParser.cpp" +#define ASTBASE_H "ASTBase.h" +#define ASTBASE_C "ASTBase.cpp" +#define PCCTSAST_C "PCCTSAST.cpp" +#define LIST_C "List.cpp" +#define DLEXERBASE_H "DLexerBase.h" +#define DLEXERBASE_C "DLexerBase.cpp" +#define DLEXER_C "DLexer.cpp" +#define STREESUPPORT_C "STreeSupport.cpp" +#endif + +/* SORCERER Stuff */ +#ifdef PC +#define STPARSER_H "STreePar.h" +#define STPARSER_C "STreePar.C" +#else +#define STPARSER_H "STreeParser.h" +#define STPARSER_C "STreeParser.cpp" +#endif + +#ifdef MPW +#define CPP_FILE_SUFFIX ".cp" +#define CPP_FILE_SUFFIX_NO_DOT "cp" +#define OBJ_FILE_SUFFIX ".o" +#else +#ifdef PC +#define CPP_FILE_SUFFIX ".cpp" +#define CPP_FILE_SUFFIX_NO_DOT "cpp" +#define OBJ_FILE_SUFFIX ".obj" +#else +#define CPP_FILE_SUFFIX ".cpp" +#define CPP_FILE_SUFFIX_NO_DOT "cpp" +#define OBJ_FILE_SUFFIX ".o" +#endif +#endif + +/* User may redefine how line information looks */ +#define LineInfoFormatStr "# %d \"%s\"\n" + +#ifdef MPW /* Macintosh Programmer's Workshop */ +#define ErrHdr "File \"%s\"; Line %d #" +#else +#define ErrHdr "%s, line %d:" +#endif + + +/* must assume old K&R cpp here, can't use #if defined(..)... */ + +#ifdef MPW +#define TopDirectory ":" +#define DirectorySymbol ":" +#define OutputDirectoryOption "Directory where all output files should go (default=\":\")" +#else +#ifdef PC +#define TopDirectory "." +#define DirectorySymbol "\\" +#define OutputDirectoryOption "Directory where all output files should go (default=\".\")" +#else +#define TopDirectory "." +#define DirectorySymbol "/" +#define OutputDirectoryOption "Directory where all output files should go (default=\".\")" +#endif +#endif + +#ifdef MPW + +/* Make sure we have prototypes for all functions under MPW */ + +#include <string.h> +#include <stdlib.h> +#include <CursorCtl.h> +#ifdef __cplusplus +extern "C" { +#endif +extern void fsetfileinfo (char *filename, unsigned long newcreator, unsigned long newtype); +#ifdef __cplusplus +} +#endif + +/* File creators for various popular development environments */ + +#define MAC_FILE_CREATOR 'MPS ' /* MPW Text files */ +#if 0 +#define MAC_FILE_CREATOR 'KAHL' /* THINK C/Symantec C++ Text files */ +#endif +#if 0 +#define MAC_FILE_CREATOR 'MMCC' /* Metrowerks C/C++ Text files */ +#endif + +#endif + +#ifdef MPW +#define DAWDLE SpinCursor(1) +#else +#define DAWDLE +#endif + + +/* + * useless definitions of special_inits() and special_fopen_actions() + * deleted -- GPW 1997/09/06 + */ + +/* Define usable bits for set.c stuff */ +#define BytesPerWord sizeof(unsigned) +#define WORDSIZE (sizeof(unsigned)*8) +#define LogWordSize (WORDSIZE==16?4:5) + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#ifdef VAXC +#define PCCTS_EXIT_SUCCESS 1 +#define PCCTS_EXIT_FAILURE 0 +#define zzDIE return 0; +#define zzDONE return 1; + +#else /* !VAXC */ + +#define PCCTS_EXIT_SUCCESS 0 +#define PCCTS_EXIT_FAILURE 1 +#define zzDIE return 1; +#define zzDONE return 0; + +#endif + +#ifdef USER_ZZMODE_STACK +# ifndef ZZSTACK_MAX_MODE +# define ZZSTACK_MAX_MODE 32 +# endif +# define ZZMAXSTK (ZZSTACK_MAX_MODE * 2) +#endif + +#endif diff --git a/src/translators/btparse/btparse.h b/src/translators/btparse/btparse.h new file mode 100644 index 0000000..841d3ee --- /dev/null +++ b/src/translators/btparse/btparse.h @@ -0,0 +1,378 @@ +/* ------------------------------------------------------------------------ +@NAME : btparse.h +@DESCRIPTION: Declarations and types for users of the btparse library. + + (Actually, btparse.h is generated from btparse.h.in by + the `configure' script, in order to automatically determine + the appropriate values of HAVE_USHORT and HAVE_BOOLEAN.) +@GLOBALS : +@CALLS : +@CREATED : 1997/01/19, Greg Ward +@MODIFIED : +@VERSION : $Id: btparse.h.in,v 1.35 1999/12/28 18:23:17 greg Exp $ +@COPYRIGHT : Copyright (c) 1996-97 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +#ifndef BTPARSE_H +#define BTPARSE_H + +#include <sys/types.h> /* probably supplies 'ushort' */ +#include <stdio.h> + +#include "config.h" /* not btparse's config.h but Tellico's */ + +/* + * Here we attempt to define HAVE_USHORT if a typdef for `ushort' appears + * in <sys/types.h>. The detective work is actually done by the + * `configure' script, so if compilation fails because of duplicate + * definitions of `ushort', that's a bug in `configure' -- please tell me + * about it! + */ + +#ifndef HAVE_USHORT +# define HAVE_USHORT 0 +#endif + +#if ! HAVE_USHORT /* needed for various bitmaps */ +typedef unsigned short ushort; +#endif + + +/* Likewise for boolean. */ + +#ifndef HAVE_BOOLEAN +# define HAVE_BOOLEAN 0 +#endif + +#if ! HAVE_BOOLEAN +typedef int boolean; +#endif + +#ifndef TRUE +# define TRUE 1 +# define FALSE 0 +#endif + +#ifndef HAVE_STRLWR +# define HAVE_STRLWR 0 +#endif + +#ifndef HAVE_STRUPR +# define HAVE_STRUPR 0 +#endif + + +/* Parsing (and post-processing) options */ + +#define BTO_CONVERT 1 /* convert numbers to strings? */ +#define BTO_EXPAND 2 /* expand macros? */ +#define BTO_PASTE 4 /* paste substrings together? */ +#define BTO_COLLAPSE 8 /* collapse whitespace? */ + +#define BTO_NOSTORE 16 + +#define BTO_FULL (BTO_CONVERT | BTO_EXPAND | BTO_PASTE | BTO_COLLAPSE) +#define BTO_MACRO (BTO_CONVERT | BTO_EXPAND | BTO_PASTE) +#define BTO_MINIMAL 0 + +#define BTO_STRINGMASK (BTO_CONVERT | BTO_EXPAND | BTO_PASTE | BTO_COLLAPSE) + +#define BT_VALID_NAMEPARTS "fvlj" +#define BT_MAX_NAMEPARTS 4 + +typedef enum +{ + BTE_UNKNOWN, + BTE_REGULAR, + BTE_COMMENT, + BTE_PREAMBLE, + BTE_MACRODEF +/* + BTE_ALIAS, + BTE_MODIFY +*/ +} bt_metatype; + +#define NUM_METATYPES ((int) BTE_MACRODEF + 1) + +typedef enum +{ + BTAST_BOGUS, /* to detect uninitialized nodes */ + BTAST_ENTRY, + BTAST_KEY, + BTAST_FIELD, + BTAST_STRING, + BTAST_NUMBER, + BTAST_MACRO +} bt_nodetype; + +typedef enum +{ + BTN_FIRST, BTN_VON, BTN_LAST, BTN_JR, BTN_NONE +} bt_namepart; + +typedef enum +{ + BTJ_MAYTIE, /* "discretionary" tie between words */ + BTJ_SPACE, /* force a space between words */ + BTJ_FORCETIE, /* force a tie (~ in TeX) */ + BTJ_NOTHING /* nothing between words */ +} bt_joinmethod; + + +#define USER_DEFINED_AST 1 + +#define zzcr_ast(ast,attr,tok,txt) \ +{ \ + (ast)->filename = InputFilename; \ + (ast)->line = (attr)->line; \ + (ast)->offset = (attr)->offset; \ + (ast)->text = strdup ((attr)->text); \ +} + +#define zzd_ast(ast) \ +/* printf ("zzd_ast: free'ing ast node with string %p (%s)\n", \ + (ast)->text, (ast)->text); */ \ + if ((ast)->text != NULL) free ((ast)->text); + + +#ifdef USER_DEFINED_AST +typedef struct _ast +{ + struct _ast *right, *down; + char * filename; + int line; + int offset; + bt_nodetype nodetype; + bt_metatype metatype; + char * text; +} AST; +#endif /* USER_DEFINED_AST */ + + +typedef struct +{ + /* + * `string' is the string that has been split; items[0] ... + * items[num_items-1] are pointers into `string', or NULL for empty + * substrings. Note that `string' is actually a copy of the string + * passed in to bt_split_list() with NULs inserted between substrings. + */ + + char * string; + int num_items; + char ** items; +} bt_stringlist; + + +typedef struct +{ + bt_stringlist * tokens; /* flat list of all tokens in name */ + char ** parts[BT_MAX_NAMEPARTS]; /* each elt. is list of pointers */ + /* into `tokens->string' */ + int part_len[BT_MAX_NAMEPARTS]; /* length in tokens */ +} bt_name; + + +typedef struct tex_tree_s +{ + char * start; + int len; + struct tex_tree_s + * child, + * next; +} bt_tex_tree; + + +typedef struct +{ + /* These determine the order (and presence) of parts in the name. */ + int num_parts; + bt_namepart parts[BT_MAX_NAMEPARTS]; + + /* + * These lists are always in the order of the bt_namepart enum -- *not* + * dependent on the particular order of parts the user specified! (This + * will make it a bit harder if I ever allow more than one occurrence of + * a part in a format; since I don't allow that, I'm not [yet] worried + * about it!) + */ + const char * pre_part[BT_MAX_NAMEPARTS]; + char * post_part[BT_MAX_NAMEPARTS]; + char * pre_token[BT_MAX_NAMEPARTS]; + const char * post_token[BT_MAX_NAMEPARTS]; + boolean abbrev[BT_MAX_NAMEPARTS]; + bt_joinmethod join_tokens[BT_MAX_NAMEPARTS]; + bt_joinmethod join_part[BT_MAX_NAMEPARTS]; +} bt_name_format; + + +typedef enum +{ + BTERR_NOTIFY, /* notification about next action */ + BTERR_CONTENT, /* warning about the content of a record */ + BTERR_LEXWARN, /* warning in lexical analysis */ + BTERR_USAGEWARN, /* warning about library usage */ + BTERR_LEXERR, /* error in lexical analysis */ + BTERR_SYNTAX, /* error in parser */ + BTERR_USAGEERR, /* fatal error in library usage */ + BTERR_INTERNAL /* my fault */ +} bt_errclass; + +typedef enum +{ + BTACT_NONE, /* do nothing on error */ + BTACT_CRASH, /* call exit(1) */ + BTACT_ABORT /* call abort() */ +} bt_erraction; + +typedef struct +{ + bt_errclass errclass; + char * filename; + int line; + const char * item_desc; + int item; + char * message; +} bt_error; + +typedef void (*bt_err_handler) (bt_error *); + + +#if defined(__cplusplus__) || defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* Function prototypes */ + +/* + * First, we might need a prototype for strdup() (because the zzcr_ast + * macro uses it, and that macro is used in pccts/ast.c -- which I don't + * want to modify if I can help it, because it's someone else's code). + * This is to accomodate AIX, where including <string.h> apparently doesn't + * declare strdup() (reported by Reiner Schlotte + * <[email protected]>), and compiling bibtex.c (which + * includes pccts/ast.c) crashes because of this (yes, yes, I know it + * should just be a warning -- I don't know what's going on there!). + * + * Unfortunately, this duplicates code in bt_config.h -- I can't include + * bt_config.h here, because this header must be freestanding; I don't want + * to include bt_config.h in pccts/ast.c, because I don't want to touch the + * PCCTS code if I can help it; but I don't want every source file that + * uses strdup() to have to include btparse.h. Hence the duplication. + * Yuck. + */ +#ifndef HAVE_STRDUP_DECL +# define HAVE_STRDUP_DECL 0 +#endif +#if !HAVE_STRDUP_DECL +extern char *strdup (const char *s); +#endif + + +/* init.c */ +void bt_initialize (void); +void bt_free_ast (AST *ast); +void bt_cleanup (void); + +/* input.c */ +void bt_set_stringopts (bt_metatype metatype, ushort options); +AST * bt_parse_entry_s (char * entry_text, + char * filename, + int line, + ushort options, + boolean * status); +AST * bt_parse_entry (FILE * infile, + char * filename, + ushort options, + boolean * status); +AST * bt_parse_file (char * filename, + ushort options, + boolean * overall_status); + +/* postprocess.c */ +void bt_postprocess_string (char * s, ushort options); +char * bt_postprocess_value (AST * value, ushort options, boolean replace); +char * bt_postprocess_field (AST * field, ushort options, boolean replace); +void bt_postprocess_entry (AST * entry, ushort options); + +/* error.c */ +void bt_reset_error_counts (void); +int bt_get_error_count (bt_errclass errclass); +int * bt_get_error_counts (int *counts); +ushort bt_error_status (int *saved_counts); + +/* macros.c */ +void bt_add_macro_value (AST *assignment, ushort options); +void bt_add_macro_text (char * macro, char * text, char * filename, int line); +void bt_delete_macro (char * macro); +void bt_delete_all_macros (void); +int bt_macro_length (char *macro); +char * bt_macro_text (char * macro, char * filename, int line); + +/* traversal.c */ +AST *bt_next_entry (AST *entry_list, AST *prev_entry); +bt_metatype bt_entry_metatype (AST *entry); +char *bt_entry_type (AST *entry); +char *bt_entry_key (AST *entry); +AST *bt_next_field (AST *entry, AST *prev, char **name); +AST *bt_next_macro (AST *entry, AST *prev, char **name); +AST *bt_next_value (AST *head, + AST *prev, + bt_nodetype *nodetype, + char **text); +char *bt_get_text (AST *node); + +/* modify.c */ +void bt_set_text (AST * node, char * new_text); +void bt_entry_set_key (AST * entry, char * new_key); + +/* names.c */ +bt_stringlist * bt_split_list (char * string, + char * delim, + char * filename, + int line, + char * description); +void bt_free_list (bt_stringlist *list); +bt_name * bt_split_name (char * name, + char * filename, + int line, + int name_num); +void bt_free_name (bt_name * name); + +/* tex_tree.c */ +bt_tex_tree * bt_build_tex_tree (char * string); +void bt_free_tex_tree (bt_tex_tree **top); +void bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream); +char * bt_flatten_tex_tree (bt_tex_tree *top); + +/* string_util.c */ +void bt_purify_string (char * string, ushort options); +void bt_change_case (char transform, char * string, ushort options); + +/* format_name.c */ +bt_name_format * bt_create_name_format (char * parts, boolean abbrev_first); +void bt_free_name_format (bt_name_format * format); +void bt_set_format_text (bt_name_format * format, + bt_namepart part, + char * pre_part, + char * post_part, + char * pre_token, + char * post_token); +void bt_set_format_options (bt_name_format * format, + bt_namepart part, + boolean abbrev, + bt_joinmethod join_tokens, + bt_joinmethod join_part); +char * bt_format_name (bt_name * name, bt_name_format * format); + +#if defined(__cplusplus__) || defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* BTPARSE_H */ diff --git a/src/translators/btparse/dlgauto.h b/src/translators/btparse/dlgauto.h new file mode 100644 index 0000000..efcc3b2 --- /dev/null +++ b/src/translators/btparse/dlgauto.h @@ -0,0 +1,408 @@ +/* dlgauto.h automaton + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Will Cohen and Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ZZDEFAUTO_H +#define ZZDEFAUTO_H + +zzchar_t *zzlextext; /* text of most recently matched token */ +zzchar_t *zzbegexpr; /* beginning of last reg expr recogn. */ +zzchar_t *zzendexpr; /* beginning of last reg expr recogn. */ +int zzbufsize; /* number of characters in zzlextext */ +int zzbegcol = 0; /* column that first character of token is in*/ +int zzendcol = 0; /* column that last character of token is in */ +int zzline = 1; /* line current token is on */ +int zzreal_line=1; /* line of 1st portion of token that is not skipped */ +int zzchar; /* character to determine next state */ +int zzbufovf; /* indicates that buffer too small for text */ +int zzcharfull = 0; +static zzchar_t *zznextpos;/* points to next available position in zzlextext*/ +static int zzclass; + +void zzerrstd(const char *); +void (*zzerr)(const char *)=zzerrstd;/* pointer to error reporting function */ +extern int zzerr_in(void); + +static FILE *zzstream_in=0; +static int (*zzfunc_in)() = zzerr_in; +static zzchar_t *zzstr_in=0; + +#ifdef USER_ZZMODE_STACK +int zzauto = 0; +#else +static int zzauto = 0; +#endif +static int zzadd_erase; +static char zzebuf[70]; + +#ifdef ZZCOL +#define ZZINC (++zzendcol) +#else +#define ZZINC +#endif + + +#define ZZGETC_STREAM {zzchar = getc(zzstream_in); zzclass = ZZSHIFT(zzchar);} +#define ZZGETC_FUNC {zzchar = (*zzfunc_in)(); zzclass = ZZSHIFT(zzchar);} +#define ZZGETC_STR { \ + if (*zzstr_in){ \ + zzchar = *zzstr_in; \ + ++zzstr_in; \ + }else{ \ + zzchar = EOF; \ + } \ + zzclass = ZZSHIFT(zzchar); \ +} + +#define ZZNEWSTATE (newstate = dfa[state][zzclass]) + +#ifndef ZZCOPY +#define ZZCOPY \ + /* Truncate matching buffer to size (not an error) */ \ + if (zznextpos < lastpos){ \ + *(zznextpos++) = zzchar; \ + }else{ \ + zzbufovf = 1; \ + } +#endif + +void +zzrdstream( FILE *f ) +{ + /* make sure that it is really set to something, otherwise just + leave it be. + */ + if (f){ + /* make sure that there is always someplace to get input + before closing zzstream_in + */ + zzline = 1; + zzstream_in = f; + zzfunc_in = NULL; + zzstr_in = 0; + zzcharfull = 0; + } +} + +void +zzrdfunc( int (*f)() ) +{ + /* make sure that it is really set to something, otherwise just + leave it be. + */ + if (f){ + /* make sure that there is always someplace to get input + before closing zzstream_in + */ + zzline = 1; + zzstream_in = NULL; + zzfunc_in = f; + zzstr_in = 0; + zzcharfull = 0; + } +} + + +void +zzrdstr( zzchar_t *s ) +{ + /* make sure that it is really set to something, otherwise just + leave it be. + */ + if (s){ + /* make sure that there is always someplace to get input + before closing zzstream_in + */ + zzline = 1; + zzstream_in = NULL; + zzfunc_in = 0; + zzstr_in = s; + zzcharfull = 0; + } +} + + +void +zzclose_stream() +{ +} + +/* saves dlg state, but not what feeds dlg (such as file position) */ +void +zzsave_dlg_state(struct zzdlg_state *state) +{ + state->stream = zzstream_in; + state->func_ptr = zzfunc_in; + state->str = zzstr_in; + state->auto_num = zzauto; + state->add_erase = zzadd_erase; + state->lookc = zzchar; + state->char_full = zzcharfull; + state->begcol = zzbegcol; + state->endcol = zzendcol; + state->line = zzline; + state->lextext = zzlextext; + state->begexpr = zzbegexpr; + state->endexpr = zzendexpr; + state->bufsize = zzbufsize; + state->bufovf = zzbufovf; + state->nextpos = zznextpos; + state->class_num = zzclass; +} + +void +zzrestore_dlg_state(struct zzdlg_state *state) +{ + zzstream_in = state->stream; + zzfunc_in = state->func_ptr; + zzstr_in = state->str; + zzauto = state->auto_num; + zzadd_erase = state->add_erase; + zzchar = state->lookc; + zzcharfull = state->char_full; + zzbegcol = state->begcol; + zzendcol = state->endcol; + zzline = state->line; + zzlextext = state->lextext; + zzbegexpr = state->begexpr; + zzendexpr = state->endexpr; + zzbufsize = state->bufsize; + zzbufovf = state->bufovf; + zznextpos = state->nextpos; + zzclass = state->class_num; +} + +void +zzmode( int m ) +{ + /* points to base of dfa table */ + if (m<MAX_MODE){ + zzauto = m; + /* have to redo class since using different compression */ + zzclass = ZZSHIFT(zzchar); + }else{ + sprintf(zzebuf,"Invalid automaton mode = %d ",m); + zzerr(zzebuf); + } +} + +/* erase what is currently in the buffer, and get a new reg. expr */ +void +zzskip() +{ + zzadd_erase = 1; +} + +/* don't erase what is in the zzlextext buffer, add on to it */ +void +zzmore() +{ + zzadd_erase = 2; +} + +/* substitute c for the reg. expr last matched and is in the buffer */ +void +zzreplchar(zzchar_t c) +{ + /* can't allow overwriting null at end of string */ + if (zzbegexpr < &zzlextext[zzbufsize-1]){ + *zzbegexpr = c; + *(zzbegexpr+1) = '\0'; + } + zzendexpr = zzbegexpr; + zznextpos = zzbegexpr + 1; +} + +/* replace the string s for the reg. expr last matched and in the buffer */ +void +zzreplstr(register zzchar_t *s) +{ + register zzchar_t *l= &zzlextext[zzbufsize -1]; + + zznextpos = zzbegexpr; + if (s){ + while ((zznextpos <= l) && (*(zznextpos++) = *(s++))!=0){ + /* empty */ + } + /* correct for NULL at end of string */ + zznextpos--; + } + if ((zznextpos <= l) && (*(--s) == 0)){ + zzbufovf = 0; + }else{ + zzbufovf = 1; + } + *(zznextpos) = '\0'; + zzendexpr = zznextpos - 1; +} + +void +zzgettok() +{ + register int state, newstate; + /* last space reserved for the null char */ + zzchar_t *lastpos; /* GPW 1997/09/05 (removed 'register' */ + +skip: + zzreal_line = zzline; + zzbufovf = 0; + lastpos = &zzlextext[zzbufsize-1]; + zznextpos = zzlextext; + zzbegcol = zzendcol+1; +more: + zzbegexpr = zznextpos; +#ifdef ZZINTERACTIVE + /* interactive version of automaton */ + /* if there is something in zzchar, process it */ + state = newstate = dfa_base[zzauto]; + if (zzcharfull){ + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + if (zzstr_in) + while (zzalternatives[newstate]){ + state = newstate; + ZZGETC_STR; + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + else if (zzstream_in) + while (zzalternatives[newstate]){ + state = newstate; + ZZGETC_STREAM; + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + else if (zzfunc_in) + while (zzalternatives[newstate]){ + state = newstate; + ZZGETC_FUNC; + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + /* figure out if last character really part of token */ + if ((state != dfa_base[zzauto]) && (newstate == DfaStates)){ + zzcharfull = 1; + --zznextpos; + }else{ + zzcharfull = 0; + state = newstate; + } + *(zznextpos) = '\0'; + /* Able to transition out of start state to some non err state?*/ + if ( state == dfa_base[zzauto] ){ + /* make sure doesn't get stuck */ + zzadvance(); + } +#else + /* non-interactive version of automaton */ + if (!zzcharfull) + zzadvance(); + else + ZZINC; + state = dfa_base[zzauto]; + if (zzstr_in) + while (ZZNEWSTATE != DfaStates){ + state = newstate; + ZZCOPY; + ZZGETC_STR; + ZZINC; + } + else if (zzstream_in) + while (ZZNEWSTATE != DfaStates){ + state = newstate; + ZZCOPY; + ZZGETC_STREAM; + ZZINC; + } + else if (zzfunc_in) + while (ZZNEWSTATE != DfaStates){ + state = newstate; + ZZCOPY; + ZZGETC_FUNC; + ZZINC; + } + zzcharfull = 1; + if ( state == dfa_base[zzauto] ){ + if (zznextpos < lastpos){ + *(zznextpos++) = zzchar; + }else{ + zzbufovf = 1; + } + *zznextpos = '\0'; + /* make sure doesn't get stuck */ + zzadvance(); + }else{ + *zznextpos = '\0'; + } +#endif +#ifdef ZZCOL + zzendcol -= zzcharfull; +#endif + zzendexpr = zznextpos -1; + zzadd_erase = 0; + (*actions[accepts[state]])(); + switch (zzadd_erase) { + case 1: goto skip; + case 2: goto more; + } +} + +void +zzadvance() +{ + if (zzstream_in) { ZZGETC_STREAM; zzcharfull = 1; ZZINC;} + if (zzfunc_in) { ZZGETC_FUNC; zzcharfull = 1; ZZINC;} + if (zzstr_in) { ZZGETC_STR; zzcharfull = 1; ZZINC;} + if (!(zzstream_in || zzfunc_in || zzstr_in)){ + zzerr_in(); + } +} + +void +zzerrstd(const char *s) +{ + fprintf(stderr, + "%s near line %d (text was '%s')\n", + ((s == NULL) ? "Lexical error" : s), + zzline,zzlextext); +} + +int +zzerr_in() +{ + fprintf(stderr,"No input stream, function, or string\n"); + /* return eof to get out gracefully */ + return EOF; +} + +#endif diff --git a/src/translators/btparse/dlgdef.h b/src/translators/btparse/dlgdef.h new file mode 100644 index 0000000..ded2c31 --- /dev/null +++ b/src/translators/btparse/dlgdef.h @@ -0,0 +1,97 @@ +/* dlgdef.h + * Things in scanner produced by dlg that should be visible to the outside + * world + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ZZDLGDEF_H +#define ZZDLGDEF_H + +#include "btconfig.h" + +#ifndef zzchar_t +#ifdef ZZWCHAR_T +#define zzchar_t unsigned wchar_t +#else +#define zzchar_t unsigned char +#endif +#endif + +struct zzdlg_state { + FILE *stream; + int (*func_ptr)(); + zzchar_t *str; + int auto_num; + int add_erase; + int lookc; + int char_full; + int begcol, endcol; + int line; + zzchar_t *lextext, *begexpr, *endexpr; + int bufsize; + int bufovf; + zzchar_t *nextpos; + int class_num; +}; + +extern zzchar_t *zzlextext; /* text of most recently matched token */ +extern zzchar_t *zzbegexpr; /* beginning of last reg expr recogn. */ +extern zzchar_t *zzendexpr; /* beginning of last reg expr recogn. */ +extern int zzbufsize; /* how long zzlextext is */ +extern int zzbegcol; /* column that first character of token is in*/ +extern int zzendcol; /* column that last character of token is in */ +extern int zzline; /* line current token is on */ +extern int zzreal_line; /* line of 1st portion of token that is not skipped */ +extern int zzchar; /* character to determine next state */ +extern int zzbufovf; /* indicates that buffer too small for text */ +extern void (*zzerr)(const char *);/* pointer to error reporting function */ + +#ifdef USER_ZZMODE_STACK +extern int zzauto; +#endif + +extern void zzadvance(void); +extern void zzskip(void); /* erase zzlextext, look for antoher token */ +extern void zzmore(void); /* keep zzlextext, look for another token */ +extern void zzmode(int k); /* switch to automaton 'k' */ +extern void zzrdstream(FILE *);/* what stream to read from */ +extern void zzclose_stream(void);/* close the current input stream */ +extern void zzrdfunc(int (*)());/* what function to get char from */ +extern void zzrdstr( zzchar_t * ); +extern void zzgettok(void); /* get next token */ +extern void zzreplchar(zzchar_t c);/* replace last recognized reg. expr. with + a character */ +extern void zzreplstr(zzchar_t *s);/* replace last recognized reg. expr. with + a string */ +extern void zzsave_dlg_state(struct zzdlg_state *); +extern void zzrestore_dlg_state(struct zzdlg_state *); +extern int zzerr_in(void); +extern void zzerrstd(const char *); +extern void zzerraction(); + +#endif diff --git a/src/translators/btparse/err.c b/src/translators/btparse/err.c new file mode 100644 index 0000000..f143048 --- /dev/null +++ b/src/translators/btparse/err.c @@ -0,0 +1,75 @@ +/* + * A n t l r S e t s / E r r o r F i l e H e a d e r + * + * Generated from: bibtex.g + * + * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995 + * Parr Research Corporation + * with Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ + +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btconfig.h" +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#define zzSET_SIZE 4 +#include "antlr.h" +#include "ast.h" +#include "tokens.h" +#include "dlgdef.h" +#include "err.h" + +const ANTLRChar *zztokens[27]={ + /* 00 */ "Invalid", + /* 01 */ "@", + /* 02 */ "AT", + /* 03 */ "\\n", + /* 04 */ "COMMENT", + /* 05 */ "[\\ \\r\\t]+", + /* 06 */ "~[\\@\\n\\ \\r\\t]+", + /* 07 */ "\\n", + /* 08 */ "[\\ \\r\\t]+", + /* 09 */ "NUMBER", + /* 10 */ "NAME", + /* 11 */ "LBRACE", + /* 12 */ "RBRACE", + /* 13 */ "ENTRY_OPEN", + /* 14 */ "ENTRY_CLOSE", + /* 15 */ "EQUALS", + /* 16 */ "HASH", + /* 17 */ "COMMA", + /* 18 */ "\"", + /* 19 */ "\\n~[\\n\\{\\}\\(\\)\"\\]*", + /* 20 */ "[\\r\\t]", + /* 21 */ "\\{", + /* 22 */ "\\}", + /* 23 */ "\\(", + /* 24 */ "\\)", + /* 25 */ "STRING", + /* 26 */ "~[\\n\\{\\}\\(\\)\"]+" +}; +SetWordType zzerr1[4] = {0x0,0x20,0x0,0x2}; +SetWordType zzerr2[4] = {0x0,0x6,0x0,0x0}; +SetWordType zzerr3[4] = {0x0,0x46,0x0,0x2}; +SetWordType zzerr4[4] = {0x0,0x44,0x0,0x0}; +SetWordType setwd1[27] = {0x0,0x7,0x6,0x0,0x0,0x0,0x0, + 0x0,0x0,0x28,0x38,0x0,0x0,0x0,0xd0, + 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, + 0x0,0x0,0x20,0x0}; +SetWordType zzerr5[4] = {0x0,0x6,0x0,0x2}; +SetWordType setwd2[27] = {0x0,0x0,0x0,0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7, + 0x0,0x4,0x7,0x0,0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0}; diff --git a/src/translators/btparse/err.h b/src/translators/btparse/err.h new file mode 100644 index 0000000..d16615d --- /dev/null +++ b/src/translators/btparse/err.h @@ -0,0 +1,700 @@ +/* + * err.h + * + * Standard error handling mechanism + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * Has grown to hold all kinds of stuff (err.h is increasingly misnamed) + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ERR_H +#define ERR_H + +#include "btconfig.h" + +#include <string.h> +#include <stdarg.h> + +#ifdef DUM +/* Define usable bits per unsigned int word (used for set stuff) */ +#ifdef PC +#define BSETWORDSIZE 16 +#define BSETLOGWORDSIZE 4 +#else +#define BSETWORDSIZE 32 +#define BSETLOGWORDSIZE 5 +#endif +#endif + +#define BSETWORDSIZE 8 +#define BSETLOGWORDSIZE 3 /* SetWordType is 8bits */ + +#define BSETMODWORD(x) ((x) & (BSETWORDSIZE-1)) /* x % BSETWORDSIZE */ +#define BSETDIVWORD(x) ((x) >> BSETLOGWORDSIZE) /* x / BSETWORDSIZE */ + +/* This is not put into the global pccts_parser structure because it is + * hidden and does not need to be saved during a "save state" operation + */ +/* maximum of 32 bits/unsigned int and must be 8 bits/byte */ +static SetWordType bitmask[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080 +}; + +void +zzresynch(SetWordType *wd,SetWordType mask) +{ + static int consumed = 1; + + /* if you enter here without having consumed a token from last resynch + * force a token consumption. + */ + if ( !consumed ) {zzCONSUME; return;} + + /* if current token is in resynch set, we've got what we wanted */ + if ( wd[LA(1)]&mask || LA(1) == zzEOF_TOKEN ) {consumed=0; return;} + + /* scan until we find something in the resynch set */ + while ( !(wd[LA(1)]&mask) && LA(1) != zzEOF_TOKEN ) {zzCONSUME;} + consumed=1; +} + +void +zzconsumeUntil(SetWordType *st) +{ + while ( !zzset_el(LA(1), st) ) { zzCONSUME; } +} + +void +zzconsumeUntilToken(int t) +{ + while ( LA(1)!=t ) { zzCONSUME; } +} + +/* input looks like: + * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText) + * where the zzMiss stuff is set here to the token that did not match + * (and which set wasn't it a member of). + */ +void +zzFAIL(int k, ...) +{ +#ifdef LL_K + static char text[LL_K*ZZLEXBUFSIZE+1]; + SetWordType *f[LL_K]; +#else + static char text[ZZLEXBUFSIZE+1]; + SetWordType *f[1]; +#endif + SetWordType **miss_set; + char **miss_text; + int *bad_tok; + char **bad_text; + int *err_k; + int i; + va_list ap; +/* Removed because it shadows a parameter. gcc 3.4 complains. + I think removing it preserves the behavior of gcc 3.3 and previous. + int k; +*/ + va_start(ap, k); + text[0] = '\0'; + for (i=1; i<=k; i++) /* collect all lookahead sets */ + { + f[i-1] = va_arg(ap, SetWordType *); + } + for (i=1; i<=k; i++) /* look for offending token */ + { + if ( i>1 ) strcat(text, " "); + strcat(text, LATEXT(i)); + if ( !zzset_el((unsigned)LA(i), f[i-1]) ) break; + } + miss_set = va_arg(ap, SetWordType **); + miss_text = va_arg(ap, char **); + bad_tok = va_arg(ap, int *); + bad_text = va_arg(ap, char **); + err_k = va_arg(ap, int *); + if ( i>k ) + { + /* bad; lookahead is permutation that cannot be matched, + * but, the ith token of lookahead is valid at the ith position + * (The old LL sub 1 (k) versus LL(k) parsing technique) + */ + *miss_set = NULL; + *miss_text = zzlextext; + *bad_tok = LA(1); + *bad_text = LATEXT(1); + *err_k = k; + return; + } +/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ + *miss_set = f[i-1]; + *miss_text = text; + *bad_tok = LA(i); + *bad_text = LATEXT(i); + if ( i==1 ) *err_k = 1; + else *err_k = k; +} + +void +zzsave_antlr_state(zzantlr_state *buf) +{ +#ifdef LL_K + int i; +#endif + +#ifdef ZZCAN_GUESS + buf->guess_start = zzguess_start; + buf->guessing = zzguessing; +#endif + buf->asp = zzasp; +#ifdef GENAST + buf->ast_sp = zzast_sp; +#endif +#ifdef ZZINF_LOOK + buf->inf_labase = zzinf_labase; + buf->inf_last = zzinf_last; +#endif +#ifdef DEMAND_LOOK + buf->dirty = zzdirty; +#endif +#ifdef LL_K + for (i=0; i<LL_K; i++) buf->tokenLA[i] = zztokenLA[i]; + for (i=0; i<LL_K; i++) strcpy(buf->textLA[i], zztextLA[i]); + buf->lap = zzlap; + buf->labase = zzlabase; +#else + buf->token = zztoken; + strcpy(buf->text, zzlextext); +#endif +} + +void +zzrestore_antlr_state(zzantlr_state *buf) +{ +#ifdef LL_K + int i; +#endif + +#ifdef ZZCAN_GUESS + zzguess_start = buf->guess_start; + zzguessing = buf->guessing; +#endif + zzasp = buf->asp; +#ifdef GENAST + zzast_sp = buf->ast_sp; +#endif +#ifdef ZZINF_LOOK + zzinf_labase = buf->inf_labase; + zzinf_last = buf->inf_last; +#endif +#ifdef DEMAND_LOOK + zzdirty = buf->dirty; +#endif +#ifdef LL_K + for (i=0; i<LL_K; i++) zztokenLA[i] = buf->tokenLA[i]; + for (i=0; i<LL_K; i++) strcpy(zztextLA[i], buf->textLA[i]); + zzlap = buf->lap; + zzlabase = buf->labase; +#else + zztoken = buf->token; + strcpy(zzlextext, buf->text); +#endif +} + +void +zzedecode(SetWordType *a) +{ + register SetWordType *p = a; + register SetWordType *endp = &(p[zzSET_SIZE]); + register unsigned e = 0; + + if ( zzset_deg(a)>1 ) fprintf(stderr, " {"); + do { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if ( t & *b ) fprintf(stderr, " %s", zztokens[e]); + e++; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + } while (++p < endp); + if ( zzset_deg(a)>1 ) fprintf(stderr, " }"); +} + +#ifndef USER_ZZSYN +/* standard error reporting function */ +void +zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) +{ + + fprintf(stderr, "line %d: syntax error at \"%s\"", zzline, (tok==zzEOF_TOKEN)?"EOF":bad_text); + if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} + if ( k==1 ) fprintf(stderr, " missing"); + else + { + fprintf(stderr, "; \"%s\" not", bad_text); + if ( zzset_deg(eset)>1 ) fprintf(stderr, " in"); + } + if ( zzset_deg(eset)>0 ) zzedecode(eset); + else fprintf(stderr, " %s", zztokens[etok]); + if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup); + fprintf(stderr, "\n"); +} +#endif + +/* is b an element of set p? */ +int +zzset_el(unsigned b, SetWordType *p) +{ + return( p[BSETDIVWORD(b)] & bitmask[BSETMODWORD(b)] ); +} + +int +zzset_deg(SetWordType *a) +{ + /* Fast compute degree of a set... the number + of elements present in the set. Assumes + that all word bits are used in the set + */ + register SetWordType *p = a; + register SetWordType *endp = &(a[zzSET_SIZE]); + register int degree = 0; + + if ( a == NULL ) return 0; + while ( p < endp ) + { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if (t & *b) ++degree; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + p++; + } + + return(degree); +} + +#ifdef DEMAND_LOOK + +#ifdef LL_K +int +_zzmatch(int _t, char **zzBadText, char **zzMissText, + int *zzMissTok, int *zzBadTok, + SetWordType **zzMissSet) +{ + if ( zzdirty==LL_K ) { + zzCONSUME; + } + if ( LA(1)!=_t ) { + *zzBadText = *zzMissText=LATEXT(1); + *zzMissTok= _t; *zzBadTok=LA(1); + *zzMissSet=NULL; + return 0; + } + zzMakeAttr + zzdirty++; + zzlabase++; + return 1; +} + +int +_zzmatch_wsig(int _t) +{ + if ( zzdirty==LL_K ) { + zzCONSUME; + } + if ( LA(1)!=_t ) { + return 0; + } + zzMakeAttr + zzdirty++; + zzlabase++; + return 1; +} + +#else + +int +_zzmatch(int _t, char **zzBadText, char **zzMissText, + int *zzMissTok, int *zzBadTok, SetWordType **zzMissSet) +{ + if ( zzdirty ) {zzCONSUME;} + if ( LA(1)!=_t ) { + *zzBadText = *zzMissText=LATEXT(1); + *zzMissTok= _t; *zzBadTok=LA(1); + *zzMissSet=NULL; + return 0; + } + zzdirty = 1; + zzMakeAttr + return 1; +} + +int +_zzmatch_wsig(int _t) +{ + if ( zzdirty ) {zzCONSUME;} + if ( LA(1)!=_t ) { + return 0; + } + zzdirty = 1; + zzMakeAttr + return 1; +} + +#endif /*LL_K*/ + +#else + +int +_zzmatch(int _t, const char **zzBadText, const char **zzMissText, + int *zzMissTok, int *zzBadTok, + SetWordType **zzMissSet) +{ + if ( LA(1)!=_t ) { + *zzBadText = *zzMissText=LATEXT(1); + *zzMissTok= _t; *zzBadTok=LA(1); + *zzMissSet=NULL; + return 0; + } + zzMakeAttr + return 1; +} + +int +_zzmatch_wsig(int _t) +{ + if ( LA(1)!=_t ) return 0; + zzMakeAttr + return 1; +} + +#endif /*DEMAND_LOOK*/ + +#ifdef ZZINF_LOOK +void +_inf_zzgettok(void) +{ + if ( zzinf_labase >= zzinf_last ) + {NLA = zzEOF_TOKEN; strcpy(NLATEXT, "");} + else { + NLA = zzinf_tokens[zzinf_labase]; + zzline = zzinf_line[zzinf_labase]; /* wrong in 1.21 */ + strcpy(NLATEXT, zzinf_text[zzinf_labase]); + zzinf_labase++; + } +} +#endif + +#ifdef ZZINF_LOOK +/* allocate default size text,token and line arrays; + * then, read all of the input reallocing the arrays as needed. + * Once the number of total tokens is known, the LATEXT(i) array (zzinf_text) + * is allocated and it's pointers are set to the tokens in zzinf_text_buffer. + */ +void +zzfill_inf_look(void) +{ + int tok, line; + int zzinf_token_buffer_size = ZZINF_DEF_TOKEN_BUFFER_SIZE; + int zzinf_text_buffer_size = ZZINF_DEF_TEXT_BUFFER_SIZE; + int zzinf_text_buffer_index = 0; + int zzinf_lap = 0; + + /* allocate text/token buffers */ + zzinf_text_buffer = (char *) malloc(zzinf_text_buffer_size); + if ( zzinf_text_buffer == NULL ) + { + fprintf(stderr, "cannot allocate lookahead text buffer (%d bytes)\n", + zzinf_text_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_tokens = (int *) calloc(zzinf_token_buffer_size,sizeof(int)); + if ( zzinf_tokens == NULL ) + { + fprintf(stderr, "cannot allocate token buffer (%d tokens)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_line = (int *) calloc(zzinf_token_buffer_size,sizeof(int)); + if ( zzinf_line == NULL ) + { + fprintf(stderr, "cannot allocate line buffer (%d ints)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + + /* get tokens, copying text to text buffer */ + zzinf_text_buffer_index = 0; + do { + zzgettok(); + line = zzreal_line; + while ( zzinf_lap>=zzinf_token_buffer_size ) + { + zzinf_token_buffer_size += ZZINF_BUFFER_TOKEN_CHUNK_SIZE; + zzinf_tokens = (int *) realloc(zzinf_tokens, + zzinf_token_buffer_size*sizeof(int)); + if ( zzinf_tokens == NULL ) + { + fprintf(stderr, "cannot allocate lookahead token buffer (%d tokens)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_line = (int *) realloc(zzinf_line, + zzinf_token_buffer_size*sizeof(int)); + if ( zzinf_line == NULL ) + { + fprintf(stderr, "cannot allocate lookahead line buffer (%d ints)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + + } + while ( (zzinf_text_buffer_index+strlen(NLATEXT)+1) >= zzinf_text_buffer_size ) + { + zzinf_text_buffer_size += ZZINF_BUFFER_TEXT_CHUNK_SIZE; + zzinf_text_buffer = (char *) realloc(zzinf_text_buffer, + zzinf_text_buffer_size); + if ( zzinf_text_buffer == NULL ) + { + fprintf(stderr, "cannot allocate lookahead text buffer (%d bytes)\n", + zzinf_text_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + } + /* record token and text and line of input symbol */ + tok = zzinf_tokens[zzinf_lap] = NLA; + strcpy(&zzinf_text_buffer[zzinf_text_buffer_index], NLATEXT); + zzinf_text_buffer_index += strlen(NLATEXT)+1; + zzinf_line[zzinf_lap] = line; + zzinf_lap++; + } while (tok!=zzEOF_TOKEN); + zzinf_labase = 0; + zzinf_last = zzinf_lap-1; + + /* allocate ptrs to text of ith token */ + zzinf_text = (char **) calloc(zzinf_last+1,sizeof(char *)); + if ( zzinf_text == NULL ) + { + fprintf(stderr, "cannot allocate lookahead text buffer (%d)\n", + zzinf_text_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_text_buffer_index = 0; + zzinf_lap = 0; + /* set ptrs so that zzinf_text[i] is the text of the ith token found on input */ + while (zzinf_lap<=zzinf_last) + { + zzinf_text[zzinf_lap++] = &zzinf_text_buffer[zzinf_text_buffer_index]; + zzinf_text_buffer_index += strlen(&zzinf_text_buffer[zzinf_text_buffer_index])+1; + } +} +#endif + +int +_zzsetmatch(SetWordType *e, char **zzBadText, char **zzMissText, + int *zzMissTok, int *zzBadTok, + SetWordType **zzMissSet) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) {zzCONSUME;} +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + if ( !zzset_el((unsigned)LA(1), e) ) { + *zzBadText = LATEXT(1); *zzMissText=NULL; + *zzMissTok= 0; *zzBadTok=LA(1); + *zzMissSet=e; + return 0; + } +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; +#else + zzdirty = 1; +#endif +#endif + zzMakeAttr + return 1; +} + +int +_zzmatch_wdfltsig(int tokenWanted, SetWordType *whatFollows) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) { + zzCONSUME; + } +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + + if ( LA(1)!=tokenWanted ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + zzline, + (LA(1)==zzEOF_TOKEN)?"<eof>":(char*)LATEXT(1), + zztokens[tokenWanted]); + zzconsumeUntil( whatFollows ); + return 0; + } + else { + zzMakeAttr +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; + zzlabase++; +#else + zzdirty = 1; +#endif +#else +/* zzCONSUME; consume if not demand lookahead */ +#endif + return 1; + } +} + +int +_zzsetmatch_wdfltsig(SetWordType *tokensWanted, + int tokenTypeOfSet, + SetWordType *whatFollows) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) {zzCONSUME;} +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + if ( !zzset_el((unsigned)LA(1), tokensWanted) ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + zzline, + (LA(1)==zzEOF_TOKEN)?"<eof>":(char*)LATEXT(1), + zztokens[tokenTypeOfSet]); + zzconsumeUntil( whatFollows ); + return 0; + } + else { + zzMakeAttr +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; + zzlabase++; +#else + zzdirty = 1; +#endif +#else +/* zzCONSUME; consume if not demand lookahead */ +#endif + return 1; + } +} + +int +_zzsetmatch_wsig(SetWordType *e) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) {zzCONSUME;} +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + if ( !zzset_el((unsigned)LA(1), e) ) return 0; +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; +#else + zzdirty = 1; +#endif +#endif + zzMakeAttr + return 1; +} + +#ifdef USER_ZZMODE_STACK +static int zzmstk[ZZMAXSTK] = { -1 }; +static int zzmdep = 0; +static char zzmbuf[70]; + +void +zzmpush( int m ) +{ + if(zzmdep == ZZMAXSTK - 1) { + sprintf(zzmbuf, "Mode stack overflow "); + zzerr(zzmbuf); + } else { + zzmstk[zzmdep++] = zzauto; + zzmode(m); + } +} + +void +zzmpop( void ) +{ + if(zzmdep == 0) + { sprintf(zzmbuf, "Mode stack underflow "); + zzerr(zzmbuf); + } + else + { zzmdep--; + zzmode(zzmstk[zzmdep]); + } +} + +void +zzsave_mode_stack( int modeStack[], int *modeLevel ) +{ + int i; + memcpy(modeStack, zzmstk, sizeof(zzmstk)); + *modeLevel = zzmdep; + zzmdep = 0; + + return; +} + +void +zzrestore_mode_stack( int modeStack[], int *modeLevel ) +{ + int i; + + memcpy(zzmstk, modeStack, sizeof(zzmstk)); + zzmdep = *modeLevel; + + return; +} +#endif /* USER_ZZMODE_STACK */ + +#endif /* ERR_H */ diff --git a/src/translators/btparse/error.c b/src/translators/btparse/error.c new file mode 100644 index 0000000..26f2fb2 --- /dev/null +++ b/src/translators/btparse/error.c @@ -0,0 +1,348 @@ +/* ------------------------------------------------------------------------ +@NAME : error.c +@DESCRIPTION: Anything relating to reporting or recording errors and + warnings. +@GLOBALS : errclass_names + err_actions + err_handlers + errclass_counts + error_buf +@CALLS : +@CREATED : 1996/08/28, Greg Ward +@MODIFIED : +@VERSION : $Id: error.c,v 2.5 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include "btparse.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + + +#define NUM_ERRCLASSES ((int) BTERR_INTERNAL + 1) + + +static const char *errclass_names[NUM_ERRCLASSES] = +{ + NULL, /* BTERR_NOTIFY */ + "warning", /* BTERR_CONTENT */ + "warning", /* BTERR_LEXWARN */ + "warning", /* BTERR_USAGEWARN */ + "error", /* BTERR_LEXERR */ + "syntax error", /* BTERR_SYNTAX */ + "fatal error", /* BTERR_USAGEERR */ + "internal error" /* BTERR_INTERNAL */ +}; + +static const bt_erraction err_actions[NUM_ERRCLASSES] = +{ + BTACT_NONE, /* BTERR_NOTIFY */ + BTACT_NONE, /* BTERR_CONTENT */ + BTACT_NONE, /* BTERR_LEXWARN */ + BTACT_NONE, /* BTERR_USAGEWARN */ + BTACT_NONE, /* BTERR_LEXERR */ + BTACT_NONE, /* BTERR_SYNTAX */ + BTACT_CRASH, /* BTERR_USAGEERR */ + BTACT_ABORT /* BTERR_INTERNAL */ +}; + +void print_error (bt_error *err); + +static bt_err_handler err_handlers[NUM_ERRCLASSES] = +{ + print_error, + print_error, + print_error, + print_error, + print_error, + print_error, + print_error, + print_error +}; + +static int errclass_counts[NUM_ERRCLASSES] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static char error_buf[MAX_ERROR+1]; + + +/* ---------------------------------------------------------------------- + * Error-handling functions. + */ + +void print_error (bt_error *err) +{ + const char * name; + boolean something_printed; + + something_printed = FALSE; + + if (err->filename) + { + fprintf (stderr, err->filename); + something_printed = TRUE; + } + if (err->line > 0) /* going to print a line number? */ + { + if (something_printed) + fprintf (stderr, ", "); + fprintf (stderr, "line %d", err->line); + something_printed = TRUE; + } + if (err->item_desc && err->item > 0) /* going to print an item number? */ + { + if (something_printed) + fprintf (stderr, ", "); + fprintf (stderr, "%s %d", err->item_desc, err->item); + something_printed = TRUE; + } + + name = errclass_names[(int) err->errclass]; + if (name) + { + if (something_printed) + fprintf (stderr, ", "); + fprintf (stderr, name); + something_printed = TRUE; + } + + if (something_printed) + fprintf (stderr, ": "); + + fprintf (stderr, "%s\n", err->message); + +} /* print_error() */ + + + +/* ---------------------------------------------------------------------- + * Error-reporting functions: these are called anywhere in the library + * when we encounter an error. + */ + +void +report_error (bt_errclass errclass, + char * filename, + int line, + const char * item_desc, + int item, + const char * fmt, + va_list arglist) +{ + bt_error err; +#if !HAVE_VSNPRINTF + int msg_len; +#endif + + err.errclass = errclass; + err.filename = filename; + err.line = line; + err.item_desc = item_desc; + err.item = item; + + errclass_counts[(int) errclass]++; + + + /* + * Blech -- we're writing to a static buffer because there's no easy + * way to know how long the error message is going to be. (Short of + * reimplementing printf(), or maybe printf()'ing to a dummy file + * and using the return value -- ugh!) The GNU C library conveniently + * supplies vsnprintf(), which neatly solves this problem by truncating + * the output string if it gets too long. (I could check for this + * truncation if I wanted to, but I don't think it's necessary given the + * ample size of the message buffer.) For non-GNU systems, though, + * we're stuck with using vsprintf()'s return value. This can't be + * trusted on all systems -- thus there's a check for it in configure. + * Also, this won't necessarily trigger the internal_error() if we + * do overflow; it's conceivable that vsprintf() itself would crash. + * At least doing it this way we avoid the possibility of vsprintf() + * silently corrupting some memory, and crashing unpredictably at some + * later point. + */ + +#if HAVE_VSNPRINTF + vsnprintf (error_buf, MAX_ERROR, fmt, arglist); +#else + msg_len = vsprintf (error_buf, fmt, arglist); + if (msg_len > MAX_ERROR) + internal_error ("static error message buffer overflowed"); +#endif + + err.message = error_buf; + if (err_handlers[errclass]) + (*err_handlers[errclass]) (&err); + + switch (err_actions[errclass]) + { + case BTACT_NONE: return; + case BTACT_CRASH: exit (1); + case BTACT_ABORT: abort (); + default: internal_error ("invalid error action %d for class %d (%s)", + (int) err_actions[errclass], + (int) errclass, errclass_names[errclass]); + } + +} /* report_error() */ + + +GEN_ERRFUNC (general_error, + (bt_errclass errclass, + char * filename, + int line, + const char * item_desc, + int item, + char * fmt, + ...), + errclass, filename, line, item_desc, item, fmt) + +GEN_ERRFUNC (error, + (bt_errclass errclass, + char * filename, + int line, + char * fmt, + ...), + errclass, filename, line, NULL, -1, fmt) + +GEN_ERRFUNC (ast_error, + (bt_errclass errclass, + AST * ast, + char * fmt, + ...), + errclass, ast->filename, ast->line, NULL, -1, fmt) + +GEN_ERRFUNC (notify, + (const char * fmt, ...), + BTERR_NOTIFY, NULL, -1, NULL, -1, fmt) + +GEN_ERRFUNC (usage_warning, + (const char * fmt, ...), + BTERR_USAGEWARN, NULL, -1, NULL, -1, fmt) + +GEN_ERRFUNC (usage_error, + (const char * fmt, ...), + BTERR_USAGEERR, NULL, -1, NULL, -1, fmt) + +GEN_ERRFUNC (internal_error, + (const char * fmt, ...), + BTERR_INTERNAL, NULL, -1, NULL, -1, fmt) + + +/* ====================================================================== + * Functions to be used outside of the library + */ + +/* ------------------------------------------------------------------------ +@NAME : bt_reset_error_counts() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Resets all the error counters to zero. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/08, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_reset_error_counts (void) +{ + int i; + + for (i = 0; i < NUM_ERRCLASSES; i++) + errclass_counts[i] = 0; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_get_error_count() +@INPUT : errclass +@OUTPUT : +@RETURNS : +@DESCRIPTION: Returns number of errors seen in the specified class. +@GLOBALS : errclass_counts +@CALLS : +@CREATED : +@MODIFIED : +-------------------------------------------------------------------------- */ +int bt_get_error_count (bt_errclass errclass) +{ + return errclass_counts[errclass]; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_get_error_counts() +@INPUT : counts - pointer to an array big enough to hold all the counts + if NULL, the array will be allocated for you (and you + must free() it when done with it) +@OUTPUT : +@RETURNS : counts - either the passed-in pointer, or the newly- + allocated array if you pass in NULL +@DESCRIPTION: Returns a newly-allocated array with the number of errors + in each error class, indexed by the members of the + eclass_t enum. +@GLOBALS : errclass_counts +@CALLS : +@CREATED : 1997/01/06, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +int *bt_get_error_counts (int *counts) +{ + int i; + + if (counts == NULL) + counts = (int *) malloc (sizeof (int) * NUM_ERRCLASSES); + for (i = 0; i < NUM_ERRCLASSES; i++) + counts[i] = errclass_counts[i]; + + return counts; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_error_status +@INPUT : saved_counts - an array of error counts as returned by + bt_get_error_counts, or NULL not to compare + to a previous checkpoint +@OUTPUT : +@RETURNS : +@DESCRIPTION: Computes a bitmap where a bit is set for each error class + that has more errors now than it used to have (or, if + saved_counts is NULL, the bit is set of there are have been + any errors in the corresponding error class). + + Eg. "x & (1<<E_SYNTAX)" (where x is returned by bt_error_status) + is true if there have been any syntax errors. +@GLOBALS : +@CALLS : +@CREATED : +@MODIFIED : +-------------------------------------------------------------------------- */ +ushort bt_error_status (int *saved_counts) +{ + int i; + ushort status; + + status = 0; + + if (saved_counts) + { + for (i = 0; i < NUM_ERRCLASSES; i++) + status |= ( (errclass_counts[i] > saved_counts[i]) << i); + } + else + { + for (i = 0; i < NUM_ERRCLASSES; i++) + status |= ( (errclass_counts[i] > 0) << i); + } + + return status; +} /* bt_error_status () */ diff --git a/src/translators/btparse/error.h b/src/translators/btparse/error.h new file mode 100644 index 0000000..aede151 --- /dev/null +++ b/src/translators/btparse/error.h @@ -0,0 +1,65 @@ +/* ------------------------------------------------------------------------ +@NAME : error.c +@DESCRIPTION: Prototypes for the error-generating functions (i.e. functions + defined in error.c, and meant only for use elswhere in the + library). +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: error.h,v 1.11 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef ERROR_H +#define ERROR_H + +#include <stdarg.h> +#include "btparse.h" /* for AST typedef */ + +#define MAX_ERROR 1024 + +#define ERRFUNC_BODY(class,filename,line,item_desc,item,format) \ +{ \ + va_list arglist; \ + \ + va_start (arglist, format); \ + report_error (class, filename, line, item_desc, item, format, arglist); \ + va_end (arglist); \ +} + +#define GEN_ERRFUNC(name,params,class,filename,line,item_desc,item,format) \ +void name params \ +ERRFUNC_BODY (class, filename, line, item_desc, item, format) + +#define GEN_PRIVATE_ERRFUNC(name,params, \ + class,filename,line,item_desc,item,format) \ +static GEN_ERRFUNC(name,params,class,filename,line,item_desc,item,format) + +/* + * Prototypes for functions exported by error.c but only used within + * the library -- functions that can be called by outsiders are declared + * in btparse.h. + */ + +void print_error (bt_error *err); +void report_error (bt_errclass class, + char * filename, int line, const char * item_desc, int item, + const char * format, va_list arglist); + +void general_error (bt_errclass class, + char * filename, int line, const char * item_desc, int item, + char * format, ...); +void error (bt_errclass class, char * filename, int line, char * format, ...); +void ast_error (bt_errclass class, AST * ast, char * format, ...); + +void notify (const char *format,...); +void usage_warning (const char * format, ...); +void usage_error (const char * format, ...); +void internal_error (const char * format, ...); + +#endif diff --git a/src/translators/btparse/format_name.c b/src/translators/btparse/format_name.c new file mode 100644 index 0000000..d6c99ae --- /dev/null +++ b/src/translators/btparse/format_name.c @@ -0,0 +1,841 @@ +/* ------------------------------------------------------------------------ +@NAME : format_name.c +@DESCRIPTION: bt_format_name() and support functions: everything needed + to turn a bt_name structure (as returned by bt_split_name()) + back into a string according to a highly customizable format. +@GLOBALS : +@CREATED : +@MODIFIED : +@VERSION : $Id: format_name.c,v 1.12 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "btparse.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ +#include "bt_debug.h" + + +static char EmptyString[] = ""; + + +#if DEBUG +/* prototypes to shut "gcc -Wmissing-prototypes" up */ +void print_tokens (char *partname, char **tokens, int num_tokens); +void dump_name (bt_name * name); +void dump_format (bt_name_format * format); +#endif + + +/* ---------------------------------------------------------------------- + * Interface to create/customize bt_name_format structures + */ + +/* ------------------------------------------------------------------------ +@NAME : bt_create_name_format +@INPUT : parts - a string of letters (maximum four, from the set + f, v, l, j, with no repetition) denoting the order + and presence of name parts. Also used to determine + certain pre-part text strings. + abbrev_first - flag: should first names be abbreviated? +@OUTPUT : +@RETURNS : +@DESCRIPTION: Creates a bt_name_format structure, slightly customized + according to the caller's choice of token order and + whether to abbreviate the first name. Use + bt_free_name_format() to free the structure (and any sub- + structures that may be allocated here). Use + bt_set_format_text() and bt_set_format_options() for + further customization of the format structure; do not + fiddle its fields directly. + + Fills in the structures `parts' field according to `parts' + string: 'f' -> BTN_FIRST, and so on. + + Sets token join methods: inter-token join (within each part) + is set to BTJ_MAYTIE (a "discretionary tie") for all parts; + inter-part join is set to BTJ_SPACE, except for a 'von' + token immediately preceding a 'last' token; there, we have + a discretionary tie. + + Sets abbreviation flags: FALSE for everything except `first', + which follows `abbrev_first' argument. + + Sets surrounding text (pre- and post-part, pre- and post- + token): empty string for everything, except: + - post-token for 'first' is "." if abbrev_first true + - if 'jr' immediately preceded by 'last': + pre-part for 'jr' is ", ", join for 'last' is nothing + - if 'first' immediately preceded by 'last' + pre-part for 'first' is ", " , join for 'last' is nothing + - if 'first' immediately preceded by 'jr' and 'jr' immediately + preceded by 'last': + pre-part for 'first' and 'jr' is ", " , + join for 'last' and 'jr' is nothing +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +bt_name_format * +bt_create_name_format (char * parts, boolean abbrev_first) +{ + int num_parts; + int num_valid_parts; + bt_name_format * + format; + int part_pos[BT_MAX_NAMEPARTS]; + int i; + + /* + * Check that the part list (a string with one letter -- f, v, l, or j + * -- for each part is valid: no longer than four characters, and no + * invalid characters. + */ + + num_parts = strlen (parts); + num_valid_parts = strspn (parts, BT_VALID_NAMEPARTS); + if (num_parts > BT_MAX_NAMEPARTS) + { + usage_error ("bt_create_name_format: part list must have no more than " + "%d letters", BT_MAX_NAMEPARTS); + } + if (num_valid_parts != num_parts) + { + usage_error ("bt_create_name_format: bad part abbreviation \"%c\" " + "(must be one of \"%s\")", + parts[num_valid_parts], BT_VALID_NAMEPARTS); + } + + + /* User input is OK -- let's create the structure */ + + format = (bt_name_format *) malloc (sizeof (bt_name_format)); + format->num_parts = num_parts; + for (i = 0; i < num_parts; i++) + { + switch (parts[i]) + { + case 'f': format->parts[i] = BTN_FIRST; break; + case 'v': format->parts[i] = BTN_VON; break; + case 'l': format->parts[i] = BTN_LAST; break; + case 'j': format->parts[i] = BTN_JR; break; + default: internal_error ("bad part abbreviation \"%c\"", parts[i]); + } + part_pos[format->parts[i]] = i; + } + for (; i < BT_MAX_NAMEPARTS; i++) + { + format->parts[i] = BTN_NONE; + } + + + /* + * Set the token join methods: between tokens for all parts is a + * discretionary tie, and the join between parts is a space (except for + * 'von': if followed by 'last', we will have a discretionary tie). + */ + for (i = 0; i < num_parts; i++) + { + format->join_tokens[i] = BTJ_MAYTIE; + format->join_part[i] = BTJ_SPACE; + } + if (part_pos[BTN_VON] + 1 == part_pos[BTN_LAST]) + format->join_part[BTN_VON] = BTJ_MAYTIE; + + + /* + * Now the abbreviation flags: follow 'abbrev_first' flag for 'first', + * and FALSE for everything else. + */ + format->abbrev[BTN_FIRST] = abbrev_first; + format->abbrev[BTN_VON] = FALSE; + format->abbrev[BTN_LAST] = FALSE; + format->abbrev[BTN_JR] = FALSE; + + + + /* + * Now fill in the "surrounding text" fields (pre- and post-part, pre- + * and post-token) -- start out with everything NULL (empty string), + * and then tweak it to handle abbreviated first names, 'jr' following + * 'last', and 'first' following 'last' or 'last' and 'jr'. In the + * last three cases, we put in some pre-part text (", "), and also + * set the join method for the *previous* part (jr or last) to + * BTJ_NOTHING, so we don't get extraneous space before the ", ". + */ + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + format->pre_part[i] = EmptyString; + format->post_part[i] = EmptyString; + format->pre_token[i] = EmptyString; + format->post_token[i] = EmptyString; + } + + /* abbreviated first name: + * "Blow J" -> "Blow J.", or "J Blow" -> "J. Blow" + */ + if (abbrev_first) + { + format->post_token[BTN_FIRST] = "."; + } + /* 'jr' after 'last': "Joe Blow Jr." -> "Joe Blow, Jr." */ + if (part_pos[BTN_JR] == part_pos[BTN_LAST]+1) + { + format->pre_part[BTN_JR] = ", "; + format->join_part[BTN_LAST] = BTJ_NOTHING; + /* 'first' after 'last' and 'jr': "Blow, Jr. Joe"->"Blow, Jr., Joe" */ + if (part_pos[BTN_FIRST] == part_pos[BTN_JR]+1) + { + format->pre_part[BTN_FIRST] = ", "; + format->join_part[BTN_JR] = BTJ_NOTHING; + } + } + /* first after last: "Blow Joe" -> "Blow, Joe" */ + if (part_pos[BTN_FIRST] == part_pos[BTN_LAST]+1) + { + format->pre_part[BTN_FIRST] = ", "; + format->join_part[BTN_LAST] = BTJ_NOTHING; + } + + DBG_ACTION + (1, printf ("bt_create_name_format(): returning structure %p\n", format)) + + return format; + +} /* bt_create_name_format() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_name_format() +@INPUT : format - free()'d, so this is an invalid pointer after the call +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees a bt_name_format structure created by + bt_create_name_format(). +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_free_name_format (bt_name_format * format) +{ + free (format); +} + + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_format_text +@INPUT : format - the format structure to update + part - which name-part to change the surrounding text for + pre_part - "pre-part" text, or NULL to leave alone + post_part - "post-part" text, or NULL to leave alone + pre_token - "pre-token" text, or NULL to leave alone + post_token - "post-token" text, or NULL to leave alone +@OUTPUT : format - pre_part, post_part, pre_token, post_token + arrays updated (only those with corresponding + non-NULL parameters are touched) +@RETURNS : +@DESCRIPTION: Sets the "surrounding text" for a particular name part in + a name format structure. +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_set_format_text (bt_name_format * format, + bt_namepart part, + char * pre_part, + char * post_part, + char * pre_token, + char * post_token) +{ + if (pre_part) format->pre_part[part] = pre_part; + if (post_part) format->post_part[part] = post_part; + if (pre_token) format->pre_token[part] = pre_token; + if (post_token) format->post_token[part] = post_token; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_format_options() +@INPUT : format + part + abbrev + join_tokens + join_part +@OUTPUT : format - abbrev, join_tokens, join_part arrays all updated +@RETURNS : +@DESCRIPTION: Sets various formatting options for a particular name part in + a name format structure. +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_set_format_options (bt_name_format * format, + bt_namepart part, + boolean abbrev, + bt_joinmethod join_tokens, + bt_joinmethod join_part) +{ + format->abbrev[part] = abbrev; + format->join_tokens[part] = join_tokens; + format->join_part[part] = join_part; +} + + + +/* ---------------------------------------------------------------------- + * Functions for actually formatting a name (given a name and a name + * format structure). + */ + +/* ------------------------------------------------------------------------ +@NAME : count_virtual_char() +@INPUT : string + offset +@OUTPUT : vchar_count +@INOUT : depth + in_special +@RETURNS : +@DESCRIPTION: Munches a single physical character from a string, updating + the virtual character count, the depth, and an "in special + character" flag. + + The virtual character count is incremented by any character + not part of a special character, and also by the right-brace + that closes a special character. The depth is incremented by + a left brace, and decremented by a right brace. in_special + is set to TRUE when we encounter a left brace at depth zero + that is immediately followed by a backslash; it is set to + false when we encounter the end of the special character, + i.e. when in_special is TRUE and we hit a right brace that + brings us back to depth zero. + + *vchar_count and *depth should both be set to zero the first + time you call count_virtual_char() on a particular string, + and in_special should be set to FALSE. +@CALLS : +@CALLERS : string_length() + string_prefix() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +count_virtual_char (char * string, + int offset, + int * vchar_count, + int * depth, + boolean * in_special) +{ + switch (string[offset]) + { + case '{': + { + /* start of a special char? */ + if (*depth == 0 && string[offset+1] == '\\') + *in_special = TRUE; + (*depth)++; + break; + } + case '}': + { + /* end of a special char? */ + if (*depth == 1 && *in_special) + { + *in_special = FALSE; + (*vchar_count)++; + } + (*depth)--; + break; + } + default: + { + /* anything else? (possibly inside a special char) */ + if (! *in_special) (*vchar_count)++; + } + } +} /* count_virtual_char () */ + + +/* this should probably be publicly available, documented, etc. */ +/* ------------------------------------------------------------------------ +@NAME : string_length() +@INPUT : string +@OUTPUT : +@RETURNS : "virtual length" of `string' +@DESCRIPTION: Counts the number of "virtual characters" in a string. A + virtual character is either an entire BibTeX special character, + or any character outside of a special character. + + Thus, "Hello" has virtual length 5, and so does + "H{\\'e}ll{\\\"o}". "{\\noop Hello there how are you?}" has + virtual length one. +@CALLS : count_virtual_char() +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +string_length (char * string) +{ + int length; + int depth; + boolean in_special; + int i; + + length = 0; + depth = 0; + in_special = FALSE; + + for (i = 0; string[i] != 0; i++) + { + count_virtual_char (string, i, &length, &depth, &in_special); + } + + return length; +} /* string_length() */ + + +/* ------------------------------------------------------------------------ +@NAME : string_prefix() +@INPUT : string + prefix_len +@OUTPUT : +@RETURNS : physical length of the prefix of `string' with a virtual length + of `prefix_len' +@DESCRIPTION: Counts the number of physical characters from the beginning + of `string' needed to extract a sub-string with virtual + length `prefix_len'. +@CALLS : count_virtual_char() +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +string_prefix (char * string, int prefix_len) +{ + int i; + int vchars_seen; + int depth; + boolean in_special; + + vchars_seen = 0; + depth = 0; + in_special = FALSE; + + for (i = 0; string[i] != 0; i++) + { + count_virtual_char (string, i, &vchars_seen, &depth, &in_special); + if (vchars_seen == prefix_len) + return i+1; + } + + return i; + +} /* string_prefix() */ + + +/* ------------------------------------------------------------------------ +@NAME : append_text() +@INOUT : string +@INPUT : offset + text + start + len +@OUTPUT : +@RETURNS : number of characters copied from text+start to string+offset +@DESCRIPTION: Copies at most `len' characters from text+start to + string+offset. (I don't use strcpy() or strncpy() for this + because I need to get the number of characters actually + copied.) +@CALLS : +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +append_text (char * string, + int offset, + const char * text, + int start, + int len) +{ + int i; + + if (text == NULL) return 0; /* no text -- none appended! */ + + for (i = 0; text[start+i] != 0; i++) + { + if (len > 0 && i == len) + break; /* exit loop without i++, right?!? */ + string[offset+i] = text[start+i]; + } /* for i */ + + return i; /* number of characters copied */ + +} /* append_text () */ + + +/* ------------------------------------------------------------------------ +@NAME : append_join +@INOUT : string +@INPUT : offset + method + should_tie +@OUTPUT : +@RETURNS : number of charactersa appended to string+offset (either 0 or 1) +@DESCRIPTION: Copies a "join character" ('~' or ' ') or nothing to + string+offset, according to the join method specified by + `method' and the `should_tie' flag. + + Specifically: if `method' is BTJ_SPACE, a space is appended + and 1 is returned; if `method' is BTJ_FORCETIE, a TeX "tie" + character ('~') is appended and 1 is returned. If `method' + is BTJ_NOTHING, `string' is unchanged and 0 is returned. If + `method' is BTJ_MAYTIE then either a tie (if should_tie is + true) or a space (otherwise) is appended, and 1 is returned. +@CALLS : +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +@COMMENTS : This should allow "tie" strings other than TeX's '~' -- I + think this could be done by putting a "tie string" field in + the name format structure, and using it here. +-------------------------------------------------------------------------- */ +static int +append_join (char * string, + int offset, + bt_joinmethod method, + boolean should_tie) +{ + switch (method) + { + case BTJ_MAYTIE: /* a "discretionary tie" -- pay */ + { /* attention to should_tie */ + if (should_tie) + string[offset] = '~'; + else + string[offset] = ' '; + return 1; + } + case BTJ_SPACE: + { + string[offset] = ' '; + return 1; + } + case BTJ_FORCETIE: + { + string[offset] = '~'; + return 1; + } + case BTJ_NOTHING: + { + return 0; + } + default: + internal_error ("bad token join method %d", (int) method); + } + + return 0; /* can't happen -- just here to */ + /* keep gcc -Wall happy */ +} /* append_join () */ + + +#define STRLEN(s) (s == NULL) ? 0 : strlen (s) + +/* ------------------------------------------------------------------------ +@NAME : format_firstpass() +@INPUT : name + format +@OUTPUT : +@RETURNS : +@DESCRIPTION: Makes the first pass over a name for formatting, in order to + establish an upper bound on the length of the formatted name. +@CALLS : +@CALLERS : bt_format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static unsigned +format_firstpass (bt_name * name, + bt_name_format * format) +{ + int i; /* loop over parts */ + int j; /* loop over tokens */ + unsigned max_length; + bt_namepart part; + char ** tok; + int num_tok; + + max_length = 0; + + for (i = 0; i < format->num_parts; i++) + { + part = format->parts[i]; /* 'cause I'm a lazy typist */ + tok = name->parts[part]; + num_tok = name->part_len[part]; + + assert ((tok != NULL) == (num_tok > 0)); + if (tok) + { + max_length += STRLEN (format->pre_part[part]); + max_length += STRLEN (format->post_part[part]); + max_length += STRLEN (format->pre_token[part]) * num_tok; + max_length += STRLEN (format->post_token[part]) * num_tok; + max_length += num_tok + 1; /* one join char per token, plus */ + /* join char to next part */ + + /* + * We ignore abbreviation here -- just overestimates the maximum + * length, so no big deal. Also saves us the bother of computing + * the physical length of the prefix of virtual length 1. + */ + for (j = 0; j < num_tok; j++) + max_length += strlen (tok[j]); + } + + } /* for i (loop over parts) */ + + return max_length; + +} /* format_firstpass() */ + + +/* ------------------------------------------------------------------------ +@NAME : format_name() +@INPUT : format + tokens - token list (eg. from format_firstpass()) + num_tokens - token count list (eg. from format_firstpass()) +@OUTPUT : fname - filled in, must be preallocated by caller +@RETURNS : +@DESCRIPTION: Performs the second pass over a name and format, to actually + put the name into a single string according to `format'. +@CALLS : +@CALLERS : bt_format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +format_name (bt_name_format * format, + char *** tokens, + int * num_tokens, + char * fname) +{ + bt_namepart parts[BT_MAX_NAMEPARTS]; /* culled list from format */ + int num_parts; + + int offset; /* into fname */ + int i; /* loop over parts */ + int j; /* loop over tokens */ + bt_namepart part; + int prefix_len; + int token_len; /* "physical" length (characters) */ + int token_vlen; /* "virtual" length (special char */ + /* counts as one character) */ + boolean should_tie; + + /* + * Cull format->parts down by keeping only those parts that are actually + * present in the current name (keeps the main loop simpler: makes it + * easy to know if the "next part" is present or not, so we know whether + * to append a join character. + */ + num_parts = 0; + for (i = 0; i < format->num_parts; i++) + { + part = format->parts[i]; + if (tokens[part]) /* name actually has this part */ + parts[num_parts++] = part; + } + + offset = 0; + token_vlen = -1; /* sanity check, and keeps */ + /* "gcc -O -Wall" happy */ + + for (i = 0; i < num_parts; i++) + { + part = parts[i]; + + offset += append_text (fname, offset, + format->pre_part[part], 0, -1); + + for (j = 0; j < num_tokens[part]; j++) + { + offset += append_text (fname, offset, + format->pre_token[part], 0, -1); + if (format->abbrev[part]) + { + prefix_len = string_prefix (tokens[part][j], 1); + token_len = append_text (fname, offset, + tokens[part][j], 0, prefix_len); + token_vlen = 1; + } + else + { + token_len = append_text (fname, offset, + tokens[part][j], 0, -1); + token_vlen = string_length (tokens[part][j]); + } + offset += token_len; + offset += append_text (fname, offset, + format->post_token[part], 0, -1); + + /* join to next token, but only if there is a next token! */ + if (j < num_tokens[part]-1) + { + should_tie = (num_tokens[part] > 1) + && (((j == 0) && (token_vlen < 3)) + || (j == num_tokens[part]-2)); + offset += append_join (fname, offset, + format->join_tokens[part], should_tie); + } + + } /* for j */ + + offset += append_text (fname, offset, + format->post_part[part], 0, -1); + /* join to the next part, but again only if there is a next part */ + if (i < num_parts-1) + { + if (token_vlen == -1) + { + internal_error ("token_vlen uninitialized -- no tokens in a part " + "that I checked existed"); + } + should_tie = (num_tokens[part] == 1 && token_vlen < 3); + offset += append_join (fname, offset, + format->join_part[part], should_tie); + } + + } /* for i (loop over parts) */ + + fname[offset] = 0; + +} /* format_name () */ + + +#if DEBUG + +#define STATIC /* so BibTeX.xs can call 'em too */ + +/* borrowed print_tokens() and dump_name() from t/name_test.c */ +STATIC void +print_tokens (char *partname, char **tokens, int num_tokens) +{ + int i; + + if (tokens) + { + printf ("%s = (", partname); + for (i = 0; i < num_tokens; i++) + { + printf ("%s%c", tokens[i], i == num_tokens-1 ? ')' : '|'); + } + putchar ('\n'); + } +} + + +STATIC void +dump_name (bt_name * name) +{ + if (name == NULL) + { + printf (" name: null\n"); + return; + } + + if (name->tokens == NULL) + { + printf (" name: null token list\n"); + return; + } + + printf (" name (%p):\n", name); + printf (" total number of tokens = %d\n", name->tokens->num_items); + print_tokens (" first", name->parts[BTN_FIRST], name->part_len[BTN_FIRST]); + print_tokens (" von", name->parts[BTN_VON], name->part_len[BTN_VON]); + print_tokens (" last", name->parts[BTN_LAST], name->part_len[BTN_LAST]); + print_tokens (" jr", name->parts[BTN_JR], name->part_len[BTN_JR]); +} + + +STATIC void +dump_format (bt_name_format * format) +{ + int i; + static char * nameparts[] = { "first", "von", "last", "jr" }; + static char * joinmethods[] = {"may tie", "space", "force tie", "nothing"}; + + printf (" name format (%p):\n", format); + printf (" order:"); + for (i = 0; i < format->num_parts; i++) + printf (" %s", nameparts[format->parts[i]]); + printf ("\n"); + + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + printf (" %-5s: pre-part=%p (%s), post-part=%p (%s)\n", + nameparts[i], + format->pre_part[i], format->pre_part[i], + format->post_part[i], format->post_part[i]); + printf (" %-5s pre-token=%p (%s), post-token=%p (%s)\n", + "", + format->pre_token[i], format->pre_token[i], + format->post_token[i],format->post_token[i]); + printf (" %-5s abbrev=%s, join_tokens=%s, join_parts=%s\n", + "", + format->abbrev[i] ? "yes" : "no", + joinmethods[format->join_tokens[i]], + joinmethods[format->join_part[i]]); + } +} +#endif + + +/* ------------------------------------------------------------------------ +@NAME : bt_format_name() +@INPUT : name + format +@OUTPUT : +@RETURNS : formatted name (allocated with malloc(); caller must free() it) +@DESCRIPTION: Formats an already-split name according to a pre-constructed + format structure. +@GLOBALS : +@CALLS : format_firstpass(), format_name() +@CALLERS : +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +char * +bt_format_name (bt_name * name, + bt_name_format * format) +{ + unsigned max_length; + char * fname; + +#if DEBUG >= 2 + printf ("bt_format_name():\n"); + dump_name (name); + dump_format (format); +#endif + + max_length = format_firstpass (name, format); + fname = (char *) malloc ((max_length+1) * sizeof (char)); +#if 0 + memset (fname, '_', max_length); + fname[max_length] = 0; +#endif + format_name (format, name->parts, name->part_len, fname); + assert (strlen (fname) <= max_length); + return fname; + +} /* bt_format_name() */ diff --git a/src/translators/btparse/init.c b/src/translators/btparse/init.c new file mode 100644 index 0000000..4a1ec06 --- /dev/null +++ b/src/translators/btparse/init.c @@ -0,0 +1,42 @@ +/* ------------------------------------------------------------------------ +@NAME : init.c +@DESCRIPTION: Initialization and cleanup functions for the btparse library. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/19, Greg Ward +@MODIFIED : +@VERSION : $Id: init.c,v 1.8 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include "stdpccts.h" /* for zzfree_ast() prototype */ +#include "parse_auxiliary.h" /* for fix_token_names() proto */ +#include "prototypes.h" /* for other prototypes */ +/*#include "my_dmalloc.h"*/ + +void bt_initialize (void) +{ + /* Initialize data structures */ + + fix_token_names (); + init_macros (); +} + + +void bt_free_ast (AST *ast) +{ + zzfree_ast (ast); +} + + +void bt_cleanup (void) +{ + done_macros (); +} diff --git a/src/translators/btparse/input.c b/src/translators/btparse/input.c new file mode 100644 index 0000000..dbb7b44 --- /dev/null +++ b/src/translators/btparse/input.c @@ -0,0 +1,499 @@ +/* ------------------------------------------------------------------------ +@NAME : input.c +@DESCRIPTION: Routines for input of BibTeX data. +@GLOBALS : InputFilename + StringOptions +@CALLS : +@CREATED : 1997/10/14, Greg Ward (from code in bibparse.c) +@MODIFIED : +@VERSION : $Id: input.c,v 1.18 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include <assert.h> +#include "stdpccts.h" +#include "lex_auxiliary.h" +#include "prototypes.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + + +char * InputFilename; +ushort StringOptions[NUM_METATYPES] = +{ + 0, /* BTE_UNKNOWN */ + BTO_FULL, /* BTE_REGULAR */ + BTO_MINIMAL, /* BTE_COMMENT */ + BTO_MINIMAL, /* BTE_PREAMBLE */ + BTO_MACRO /* BTE_MACRODEF */ +}; + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_filename +@INPUT : filename +@OUTPUT : +@RETURNS : +@DESCRIPTION: Sets the current input filename -- used for generating + error and warning messages. +@GLOBALS : InputFilename +@CALLS : +@CREATED : Feb 1997, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +#if 0 +void bt_set_filename (char *filename) +{ + InputFilename = filename; +} +#endif + +/* ------------------------------------------------------------------------ +@NAME : bt_set_stringopts +@INPUT : metatype + options +@OUTPUT : +@RETURNS : +@DESCRIPTION: Sets the string-processing options for a particular + entry metatype. Used later on by bt_parse_* to determine + just how to post-process each particular entry. +@GLOBALS : StringOptions +@CREATED : 1997/08/24, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_set_stringopts (bt_metatype metatype, ushort options) +{ + if (metatype < BTE_REGULAR || metatype > BTE_MACRODEF) + usage_error ("bt_set_stringopts: illegal metatype"); + if (options & ~BTO_STRINGMASK) + usage_error ("bt_set_stringopts: illegal options " + "(must only set string option bits"); + + StringOptions[metatype] = options; +} + + +/* ------------------------------------------------------------------------ +@NAME : start_parse +@INPUT : infile input stream we'll read from (or NULL if reading + from string) + instring input string we'll read from (or NULL if reading + from stream) + line line number of the start of the string (just + use 1 if the string is standalone and independent; + if it comes from a file, you should supply the + line number where it starts for better error + messages) (ignored if infile != NULL) +@OUTPUT : +@RETURNS : +@DESCRIPTION: Prepares things for parsing, in particular initializes the + lexical state and lexical buffer, prepares DLG for + reading (either from a stream or a string), and reads + the first token. +@GLOBALS : +@CALLS : initialize_lexer_state() + alloc_lex_buffer() + zzrdstream() or zzrdstr() + zzgettok() +@CALLERS : +@CREATED : 1997/06/21, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +start_parse (FILE *infile, char *instring, int line) +{ + if ( (infile == NULL) == (instring == NULL) ) + { + internal_error ("start_parse(): exactly one of infile and " + "instring may be non-NULL"); + } + initialize_lexer_state (); + alloc_lex_buffer (ZZLEXBUFSIZE); + if (infile) + { + zzrdstream (infile); + } + else + { + zzrdstr (instring); + zzline = line; + } + + zzendcol = zzbegcol = 0; + zzgettok (); +} + + + +/* ------------------------------------------------------------------------ +@NAME : finish_parse() +@INPUT : err_counts - pointer to error count list (which is local to + the parsing functions, hence has to be passed in) +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees up what was needed to parse a whole file or a sequence + of strings: the lexical buffer and the error count list. +@GLOBALS : +@CALLS : free_lex_buffer() +@CALLERS : +@CREATED : 1997/06/21, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +finish_parse (int **err_counts) +{ + free_lex_buffer (); + free (*err_counts); + *err_counts = NULL; +} + + +/* ------------------------------------------------------------------------ +@NAME : parse_status() +@INPUT : saved_counts +@OUTPUT : +@RETURNS : false if there were serious errors in the recently-parsed input + true otherwise (no errors or just warnings) +@DESCRIPTION: Gets the "error status" bitmap relative to a saved set of + error counts and masks of non-serious errors. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/06/21, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static boolean +parse_status (int *saved_counts) +{ + ushort ignore_emask; + + /* + * This bit-twiddling fetches the error status (which has a bit + * for each error class), masks off the bits for trivial errors + * to get "true" if there were any serious errors, and then + * returns the opposite of that. + */ + ignore_emask = + (1<<BTERR_NOTIFY) | (1<<BTERR_CONTENT) | (1<<BTERR_LEXWARN); + return !(bt_error_status (saved_counts) & ~ignore_emask); +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_parse_entry_s() +@INPUT : entry_text - string containing the entire entry to parse, + or NULL meaning we're done, please cleanup + options - standard btparse options bitmap + line - current line number (if that makes any sense) + -- passed to the parser to set zzline, so that + lexical and syntax errors are properly localized +@OUTPUT : *top - newly-allocated AST for the entry + (or NULL if entry_text was NULL, ie. at EOF) +@RETURNS : 1 with *top set to AST for entry on successful read/parse + 1 with *top==NULL if entry_text was NULL, ie. at EOF + 0 if any serious errors seen in input (*top is still + set to the AST, but only for as much of the input as we + were able to parse) + (A "serious" error is a lexical or syntax error; "trivial" + errors such as warnings and notifications count as "success" + for the purposes of this function's return value.) +@DESCRIPTION: Parses a BibTeX entry contained in a string. +@GLOBALS : +@CALLS : ANTLR +@CREATED : 1997/01/18, GPW (from code in bt_parse_entry()) +@MODIFIED : +-------------------------------------------------------------------------- */ +AST * bt_parse_entry_s (char * entry_text, + char * filename, + int line, + ushort options, + boolean * status) +{ + AST * entry_ast = NULL; + static int * err_counts = NULL; + + if (options & BTO_STRINGMASK) /* any string options set? */ + { + usage_error ("bt_parse_entry_s: illegal options " + "(string options not allowed"); + } + + InputFilename = filename; + err_counts = bt_get_error_counts (err_counts); + + if (entry_text == NULL) /* signal to clean up */ + { + finish_parse (&err_counts); + if (status) *status = TRUE; + return NULL; + } + + zzast_sp = ZZAST_STACKSIZE; /* workaround apparent pccts bug */ + start_parse (NULL, entry_text, line); + + entry (&entry_ast); /* enter the parser */ + ++zzasp; /* why is this done? */ + + if (entry_ast == NULL) /* can happen with very bad input */ + { + if (status) *status = FALSE; + return entry_ast; + } + +#if DEBUG + dump_ast ("bt_parse_entry_s: single entry, after parsing:\n", + entry_ast); +#endif + bt_postprocess_entry (entry_ast, + StringOptions[entry_ast->metatype] | options); +#if DEBUG + dump_ast ("bt_parse_entry_s: single entry, after post-processing:\n", + entry_ast); +#endif + + if (status) *status = parse_status (err_counts); + return entry_ast; + +} /* bt_parse_entry_s () */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_parse_entry() +@INPUT : infile - file to read next entry from + options - standard btparse options bitmap +@OUTPUT : *top - AST for the entry, or NULL if no entries left in file +@RETURNS : same as bt_parse_entry_s() +@DESCRIPTION: Starts (or continues) parsing from a file. +@GLOBALS : +@CALLS : +@CREATED : Jan 1997, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +AST * bt_parse_entry (FILE * infile, + char * filename, + ushort options, + boolean * status) +{ + AST * entry_ast = NULL; + static int * err_counts = NULL; + static FILE * prev_file = NULL; + + if (prev_file != NULL && infile != prev_file) + { + usage_error ("bt_parse_entry: you can't interleave calls " + "across different files"); + } + + if (options & BTO_STRINGMASK) /* any string options set? */ + { + usage_error ("bt_parse_entry: illegal options " + "(string options not allowed)"); + } + + InputFilename = filename; + err_counts = bt_get_error_counts (err_counts); + + if (feof (infile)) + { + if (prev_file != NULL) /* haven't already done the cleanup */ + { + prev_file = NULL; + finish_parse (&err_counts); + } + else + { + usage_warning ("bt_parse_entry: second attempt to read past eof"); + } + + if (status) *status = TRUE; + return NULL; + } + + /* + * Here we do some nasty poking about the innards of PCCTS in order to + * enter the parser multiple times on the same input stream. This code + * comes from expanding the macro invokation: + * + * ANTLR (entry (top), infile); + * + * When LL_K, ZZINF_LOOK, and DEMAND_LOOK are all undefined, this + * ultimately expands to + * + * zzbufsize = ZZLEXBUFSIZE; + * { + * static char zztoktext[ZZLEXBUFSIZE]; + * zzlextext = zztoktext; + * zzrdstream (f); + * zzgettok(); + * } + * entry (top); + * ++zzasp; + * + * (I'm expanding hte zzenterANTLR, zzleaveANTLR, and zzPrimateLookAhead + * macros, but leaving ZZLEXBUFSIZE -- a simple constant -- alone.) + * + * There are two problems with this: 1) zztoktext is a statically + * allocated buffer, and when it overflows we just ignore further + * characters that should belong to that lexeme; and 2) zzrdstream() and + * zzgettok() are called every time we enter the parser, which means the + * token left over from the previous entry will be discarded when we + * parse entries 2 .. N. + * + * I handle the static buffer problem with alloc_lex_buffer() and + * realloc_lex_buffer() (in lex_auxiliary.c), and by rewriting the ZZCOPY + * macro to call realloc_lex_buffer() when overflow is detected. + * + * I handle the extra token-read by hanging on to a static file + * pointer, prev_file, between calls to bt_parse_entry() -- when + * the program starts it is NULL, and we reset it to NULL on + * finishing a file. Thus, any call that is the first on a given + * file will allocate the lexical buffer and read the first token; + * thereafter, we skip those steps, and free the buffer on reaching + * end-of-file. Currently, this method precludes interleaving + * calls to bt_parse_entry() on different files -- perhaps I could + * fix this with the zz{save,restore}_{antlr,dlg}_state() + * functions? + */ + + zzast_sp = ZZAST_STACKSIZE; /* workaround apparent pccts bug */ + +#if defined(LL_K) || defined(ZZINF_LOOK) || defined(DEMAND_LOOK) +# error One of LL_K, ZZINF_LOOK, or DEMAND_LOOK was defined +#endif + if (prev_file == NULL) /* only read from input stream if */ + { /* starting afresh with a file */ + start_parse (infile, NULL, 0); + prev_file = infile; + } + assert (prev_file == infile); + + entry (&entry_ast); /* enter the parser */ + ++zzasp; /* why is this done? */ + + if (entry_ast == NULL) /* can happen with very bad input */ + { + if (status) *status = FALSE; + return entry_ast; + } + +#if DEBUG + dump_ast ("bt_parse_entry(): single entry, after parsing:\n", + entry_ast); +#endif + bt_postprocess_entry (entry_ast, + StringOptions[entry_ast->metatype] | options); +#if DEBUG + dump_ast ("bt_parse_entry(): single entry, after post-processing:\n", + entry_ast); +#endif + + if (status) *status = parse_status (err_counts); + return entry_ast; + +} /* bt_parse_entry() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_parse_file () +@INPUT : filename - name of file to open. If NULL or "-", we read + from stdin rather than opening a new file. + options +@OUTPUT : top +@RETURNS : 0 if any entries in the file had serious errors + 1 if all entries were OK +@DESCRIPTION: Parses an entire BibTeX file, and returns a linked list + of ASTs (or, if you like, a forest) for the entries in it. + (Any entries with serious errors are omitted from the list.) +@GLOBALS : +@CALLS : bt_parse_entry() +@CREATED : 1997/01/18, from process_file() in bibparse.c +@MODIFIED : +@COMMENTS : This function bears a *striking* resemblance to bibparse.c's + process_file(). Eventually, I plan to replace this with + a generalized process_file() that takes a function pointer + to call for each entry. Until I decide on the right interface + for that, though, I'm sticking with this simpler (but possibly + memory-intensive) approach. +-------------------------------------------------------------------------- */ +AST * bt_parse_file (char * filename, + ushort options, + boolean * status) +{ + FILE * infile; + AST * entries, + * cur_entry, + * last; + boolean entry_status, + overall_status; + + if (options & BTO_STRINGMASK) /* any string options set? */ + { + usage_error ("bt_parse_file: illegal options " + "(string options not allowed"); + } + + /* + * If a string was given, and it's *not* "-", then open that filename. + * Otherwise just use stdin. + */ + + if (filename != NULL && strcmp (filename, "-") != 0) + { + InputFilename = filename; + infile = fopen (filename, "r"); + if (infile == NULL) + { + perror (filename); + return 0; + } + } + else + { + InputFilename = "(stdin)"; + infile = stdin; + } + + entries = NULL; + last = NULL; + +#if 1 + /* explicit loop over entries, with junk cleaned out by read_entry () */ + + overall_status = TRUE; /* assume success */ + while ((cur_entry = bt_parse_entry + (infile, InputFilename, options, &entry_status))) + { + overall_status &= entry_status; + if (!entry_status) continue; /* bad entry -- try next one */ + if (!cur_entry) break; /* at eof -- we're done */ + if (last == NULL) /* this is the first entry */ + entries = cur_entry; + else /* have already seen one */ + last->right = cur_entry; + + last = cur_entry; + } + +#else + /* let the PCCTS lexer/parser handle everything */ + + initialize_lexer_state (); + ANTLR (bibfile (top), infile); + +#endif + + fclose (infile); + InputFilename = NULL; + if (status) *status = overall_status; + return entries; + +} /* bt_parse_file() */ diff --git a/src/translators/btparse/lex_auxiliary.c b/src/translators/btparse/lex_auxiliary.c new file mode 100644 index 0000000..8fac463 --- /dev/null +++ b/src/translators/btparse/lex_auxiliary.c @@ -0,0 +1,939 @@ +/* ------------------------------------------------------------------------ +@NAME : lex_auxiliary.c +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: The code and global variables here have three main purposes: + - maintain the lexical buffer (zztoktext, which + traditionally with PCCTS is a static array; I have + changed things so that it's dynamically allocated and + resized on overflow) + - keep track of lexical state that's not handled by PCCTS + code (like "where are we in terms of BibTeX entries?" or + "what are the delimiters for the current entry/string?") + - everything called from lexical actions is here, to keep + the grammar file itself neat and clean +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : Greg Ward, 1996/07/25-28 +@MODIFIED : Jan 1997 + Jun 1997 +@VERSION : $Id: lex_auxiliary.c,v 1.31 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#include <assert.h> +#include "lex_auxiliary.h" +#include "stdpccts.h" +#include "error.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + +#define DUPE_TEXT 0 + +extern char * InputFilename; /* from input.c */ + +GEN_PRIVATE_ERRFUNC (lexical_warning, (const char * fmt, ...), + BTERR_LEXWARN, InputFilename, zzline, NULL, -1, fmt) +GEN_PRIVATE_ERRFUNC (lexical_error, (const char * fmt, ...), + BTERR_LEXERR, InputFilename, zzline, NULL, -1, fmt) + + + +/* ---------------------------------------------------------------------- + * Global variables + */ + +/* First, the lexical buffer. This is used elsewhere, so can't be static */ +char * zztoktext = NULL; + +/* + * Now, the lexical state -- first, stuff that arises from scanning + * at top-level and the beginnings of entries; + * EntryState: + * toplevel when we start scanning a file, or when we are in in_entry + * mode and see '}' or ')' + * after_at when we are in toplevel mode and see an '@' + * after_type when we are in after_at mode and see a name (!= 'comment') + * in_comment when we are in after_at mode and see a name (== 'comment') + * in_entry when we are in after_type mode and see '{' or '(' + * EntryOpener: + * the character ('(' or '{') which opened the entry currently being + * scanned (we use this to make sure that the entry opener and closer + * match; if not, we issue a warning) + * EntryMetatype: (NB. typedef for bt_metatype is in btparse.h) + * classifies entries according to the syntax we will use to parse them; + * also winds up (after being changed to a bt_nodetype value) in the + * node that roots the entry AST: + * comment - anything between () or {} + * preamble - a single compound value + * string - a list of "name = compound_value" assignments; no key + * alias - a single "name = compound_value" assignment (where + * the compound value in this case is presumably a + * name, rather than a string -- this is not syntactically + * checked though) + * modify, + * entry - a key followed by a list of "name = compound_value" + * assignments + * JunkCount: + * the number of non-whitespace, non-'@' characters seen at toplevel + * between two entries (used to print out a warning when we hit + * the beginning of entry, to help people catch "old style" implicit + * comments + */ +static enum { toplevel, after_at, after_type, in_comment, in_entry } + EntryState; +static char EntryOpener; /* '(' or '{' */ +static bt_metatype + EntryMetatype; +static int JunkCount; /* non-whitespace chars at toplevel */ + +/* + * String state -- these are maintained and used by the functions called + * from actions in the string lexer. + * BraceDepth: + * brace depth within a string; we can only end the current string + * when this is zero + * ParenDepth: + * parenthesis depth within a string; needed for @comment entries + * that are paren-delimited (because the comment in that case is + * a paren-delimited string) + * StringOpener: + * similar to EntryOpener, but stronger than merely warning of token + * mismatch -- this determines which character ('"' or '}') can + * actually end the string + * StringStart: + * line on which current string started; if we detect an apparent + * runaway, this is used to report where the runaway started + * ApparentRunaway: + * flags if we have already detected (and warned) that the current + * string appears to be a runaway, so that we don't warn again + * (and again and again and again) + * QuoteWarned: + * flags if we have already warned about seeing a '"' in a string, + * because they tend to come in pairs and one warning per string + * is enough + * + * (See bibtex.g for an explanation of my runaway string detection heuristic.) + */ +static char StringOpener = '\0'; /* '{' or '"' */ +static int BraceDepth; /* depth of brace-nesting */ +static int ParenDepth; /* depth of parenthesis-nesting */ +static int StringStart = -1; /* start line of current string */ +static int ApparentRunaway; /* current string looks like runaway */ +static int QuoteWarned; /* already warned about " in string? */ + + + +/* ---------------------------------------------------------------------- + * Miscellaneous functions: + * lex_info() (handy for debugging) + * zzcr_attr() (called from PCCTS-generated code) + */ + +void lex_info (void) +{ + printf ("LA(1) = \"%s\" token %d, %s\n", LATEXT(1), LA(1), zztokens[LA(1)]); +#ifdef LL_K + printf ("LA(2) = \"%s\" token %d, %s\n", LATEXT(2), LA(2), zztokens[LA(2)]); +#endif +} + + +void zzcr_attr (Attrib *a, int tok, char *txt) +{ + if (tok == STRING) + { + int len = strlen (txt); + + assert ((txt[0] == '{' && txt[len-1] == '}') + || (txt[0] == '"' && txt[len-1] == '"')); + txt[len-1] = (char) 0; /* remove closing quote from string */ + txt++; /* so we'll skip the opening quote */ + } + +#if DUPE_TEXT + a->text = strdup (txt); +#else + a->text = txt; +#endif + a->token = tok; + a->line = zzline; + a->offset = zzbegcol; +#if DEBUG > 1 + dprintf ("zzcr_attr: input txt = %p (%s)\n", txt, txt); + dprintf (" dupe txt = %p (%s)\n", a->text, a->text); +#endif +} + + +#if DUPE_TEXT +void zzd_attr (Attrib *attr) +{ + free (attr->text); +} +#endif + + +/* ---------------------------------------------------------------------- + * Lexical buffer functions: + * alloc_lex_buffer() + * realloc_lex_buffer() + * free_lex_buffer() + * lexer_overflow() + * zzcopy() (only if ZZCOPY_FUNCTION is defined and true) + */ + + +/* + * alloc_lex_buffer() + * + * allocates the lexical buffer with `size' characters. Clears the buffer, + * points zzlextext at it, and sets zzbufsize to `size'. + * + * Does nothing if the buffer is already allocated. + * + * globals: zztoktext, zzlextext, zzbufsize + * callers: bt_parse_entry() (in input.c) + */ +void alloc_lex_buffer (int size) +{ + if (zztoktext == NULL) + { + zztoktext = (char *) malloc (size * sizeof (char)); + memset (zztoktext, 0, size); + zzlextext = zztoktext; + zzbufsize = size; + } +} /* alloc_lex_buffer() */ + + +/* + * realloc_lex_buffer() + * + * Reallocates the lexical buffer -- size is increased by `size_increment' + * characters (which could be negative). Updates all globals that point + * to or into the buffer (zzlextext, zzbegexpr, zzendexpr), as well as + * zztoktext (the buffer itself) zzbufsize (the buffer size). + * + * This is only meant to be called (ultimately) from zzgettok(), part of + * the DLG code. (In fact, zzgettok() invokes the ZZCOPY() macro, which + * calls lexer_overflow() on buffer overflow, which calls + * realloc_lex_buffer(). Whatever.) The `lastpos' and `nextpos' arguments + * correspond, respectively, to a local variable in zzgettok() and a static + * global in dlgauto.h (hence really in scan.c). They both point into + * the lexical buffer, so have to be passed by reference here so that + * we can update them to point into the newly-reallocated buffer. + * + * globals: zztottext, zzbufsize, zzlextext, zzbegexpr, zzendexpr + * callers: lexer_overflow() + */ +static void +realloc_lex_buffer (int size_increment, + unsigned char ** lastpos, + unsigned char ** nextpos) +{ + int beg, end, next; + + if (zztoktext == NULL) + internal_error ("attempt to reallocate unallocated lexical buffer"); + + zztoktext = (char *) realloc (zztoktext, zzbufsize+size_increment); + memset (zztoktext+zzbufsize, 0, size_increment); + zzbufsize += size_increment; + + beg = zzbegexpr - zzlextext; + end = zzendexpr - zzlextext; + next = *nextpos - zzlextext; + zzlextext = zztoktext; + + if (lastpos != NULL) + *lastpos = zzlextext+zzbufsize-1; + zzbegexpr = zzlextext + beg; + zzendexpr = zzlextext + end; + *nextpos = zzlextext + next; + +} /* realloc_lex_buffer() */ + + +/* + * free_lex_buffer() + * + * Frees the lexical buffer allocated by alloc_lex_buffer(). + */ +void free_lex_buffer (void) +{ + if (zztoktext == NULL) + internal_error ("attempt to free unallocated (or already freed) " + "lexical buffer"); + + free (zztoktext); + zztoktext = NULL; +} /* free_lex_buffer() */ + + +/* + * lexer_overflow() + * + * Prints a warning and calls realloc_lex_buffer() to increase the size + * of the lexical buffer by ZZLEXBUFSIZE (a constant -- hence the buffer + * size increases linearly, not exponentially). + * + * Also prints a couple of lines of useful debugging stuff if DEBUG is true. + */ +void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos) +{ +#if DEBUG + char head[16], tail[16]; + + printf ("zzcopy: overflow detected\n"); + printf (" zzbegcol=%d, zzendcol=%d, zzline=%d\n", + zzbegcol, zzendcol, zzline); + strncpy (head, zzlextext, 15); head[15] = 0; + strncpy (tail, zzlextext+ZZLEXBUFSIZE-15, 15); tail[15] = 0; + printf (" zzlextext=>%s...%s< (last char=%d (%c))\n", + head, tail, + zzlextext[ZZLEXBUFSIZE-1], zzlextext[ZZLEXBUFSIZE-1]); + printf (" zzchar = %d (%c), zzbegexpr=zzlextext+%d\n", + zzchar, zzchar, zzbegexpr-zzlextext); +#endif + + notify ("lexical buffer overflowed (reallocating to %d bytes)", + zzbufsize+ZZLEXBUFSIZE); + realloc_lex_buffer (ZZLEXBUFSIZE, lastpos, nextpos); + +} /* lexer_overflow () */ + + +#if ZZCOPY_FUNCTION +/* + * zzcopy() + * + * Does the same as the ZZCOPY macro (in lex_auxiliary.h), but as a + * function for easier debugging. + */ +void zzcopy (char **nextpos, char **lastpos, int *ovf_flag) +{ + if (*nextpos >= *lastpos) + { + lexer_overflow (lastpos, nextpos); + } + + **nextpos = zzchar; + (*nextpos)++; +} +#endif + + + +/* ---------------------------------------------------------------------- + * Report/maintain lexical state + * report_state() (only meaningful if DEBUG) + * initialize_lexer_state() + * + * Note that the lexical action functions, below, also fiddle with + * the lexical state variables an awful lot. + */ + +#if DEBUG +char *state_names[] = + { "toplevel", "after_at", "after_type", "in_comment", "in_entry" }; +char *metatype_names[] = + { "unknown", "comment", "preamble", "string", "alias", "modify", "entry" }; + +static void +report_state (char *where) +{ + printf ("%s: lextext=%s (line %d, offset %d), token=%d, " + "EntryState=%s\n", + where, zzlextext, zzline, zzbegcol, NLA, + state_names[EntryState]); +} +#else +# define report_state(where) +/* +static void +report_state (char *where) { } +*/ +#endif + +void initialize_lexer_state (void) +{ + zzmode (START); + EntryState = toplevel; + EntryOpener = (char) 0; + EntryMetatype = BTE_UNKNOWN; + JunkCount = 0; +} + + +bt_metatype entry_metatype (void) +{ + return EntryMetatype; +} + + + +/* ---------------------------------------------------------------------- + * Lexical actions (START and LEX_ENTRY modes) + */ + +/* + * newline () + * + * Does everything needed to handle newline outside of a quoted string: + * increments line counter and skips the newline. + */ +void newline (void) +{ + zzline++; + zzskip(); +} + + +void comment (void) +{ + zzline++; + zzskip(); +} + + +void at_sign (void) +{ + if (EntryState == toplevel) + { + EntryState = after_at; + zzmode (LEX_ENTRY); + if (JunkCount > 0) + { + lexical_warning ("%d characters of junk seen at toplevel", JunkCount); + JunkCount = 0; + } + } + else + { + /* internal_error ("lexer recognized \"@\" at other than top-level"); */ + lexical_warning ("\"@\" in strange place -- should get syntax error"); + } + report_state ("at_sign"); +} + + +void toplevel_junk (void) +{ + JunkCount += strlen (zzlextext); + zzskip (); +} + + +void name (void) +{ + report_state ("name (pre)"); + + switch (EntryState) + { + case toplevel: + { + internal_error ("junk at toplevel (\"%s\")", zzlextext); + break; + } + case after_at: + { + char * etype = zzlextext; + EntryState = after_type; + + if (strcasecmp (etype, "comment") == 0) + { + EntryMetatype = BTE_COMMENT; + EntryState = in_comment; + } + + else if (strcasecmp (etype, "preamble") == 0) + EntryMetatype = BTE_PREAMBLE; + + else if (strcasecmp (etype, "string") == 0) + EntryMetatype = BTE_MACRODEF; +/* + else if (strcasecmp (etype, "alias") == 0) + EntryMetatype = BTE_ALIAS; + + else if (strcasecmp (etype, "modify") == 0) + EntryMetatype = BTE_MODIFY; +*/ + else + EntryMetatype = BTE_REGULAR; + + break; + } + case after_type: + case in_comment: + case in_entry: + break; /* do nothing */ + } + + report_state ("name (post)"); + +} + + +void lbrace (void) +{ + /* + * Currently takes a restrictive view of "when an lbrace is an entry + * opener" -- ie. *only* after '@name' (as determined by EntryState), + * where name is not 'comment'. This means that lbrace usually + * determines a string (in particular, when it's seen at toplevel -- + * which will happen under certain error situations), which in turn + * means that some unexpected things can become strings (like whole + * entries). + */ + + if (EntryState == in_entry || EntryState == in_comment) + { + start_string ('{'); + } + else if (EntryState == after_type) + { + EntryState = in_entry; + EntryOpener = '{'; + NLA = ENTRY_OPEN; + } + else + { + lexical_warning ("\"{\" in strange place -- should get a syntax error"); + } + + report_state ("lbrace"); +} + + +void rbrace (void) +{ + if (EntryState == in_entry) + { + if (EntryOpener == '(') + lexical_warning ("entry started with \"(\", but ends with \"}\""); + NLA = ENTRY_CLOSE; + initialize_lexer_state (); + } + else + { + lexical_warning ("\"}\" in strange place -- should get a syntax error"); + } + report_state ("rbrace"); +} + + +void lparen (void) +{ + if (EntryState == in_comment) + { + start_string ('('); + } + else if (EntryState == after_type) + { + EntryState = in_entry; + EntryOpener = '('; + } + else + { + lexical_warning ("\"(\" in strange place -- should get a syntax error"); + } + report_state ("lparen"); +} + + +void rparen (void) +{ + if (EntryState == in_entry) + { + if (EntryOpener == '{') + lexical_warning ("entry started with \"{\", but ends with \")\""); + initialize_lexer_state (); + } + else + { + lexical_warning ("\")\" in strange place -- should get a syntax error"); + } + report_state ("rparen"); +} + + +/* ---------------------------------------------------------------------- + * Stuff for processing strings. + */ + + +/* + * start_string () + * + * Called when we see a '{' or '"' in the field data. Records which quote + * character was used, and calls open_brace() to increment the depth + * counter if it was a '{'. Switches to LEX_STRING mode, and tells the + * lexer to continue slurping characters into the same buffer. + */ +void start_string (char start_char) +{ + StringOpener = start_char; + BraceDepth = 0; + ParenDepth = 0; + StringStart = zzline; + ApparentRunaway = 0; + QuoteWarned = 0; + if (start_char == '{') + open_brace (); + if (start_char == '(') + ParenDepth++; + if (start_char == '"' && EntryState == in_comment) + { + lexical_error ("comment entries must be delimited by either braces or parentheses"); + EntryState = toplevel; + zzmode (START); + return; + } + +#ifdef USER_ZZMODE_STACK + if (zzauto != LEX_ENTRY || EntryState != in_entry) +#else + if (EntryState != in_entry && EntryState != in_comment) +#endif + { + lexical_warning ("start of string seen at weird place"); + } + + zzmore (); + zzmode (LEX_STRING); +} + + +/* + * end_string () + * + * Called when we see either a '"' (at depth 0) or '}' (if it brings us + * down to depth 0) in a quoted string. Just makes sure that braces are + * balanced, and then goes back to the LEX_FIELD mode. + */ +void end_string (char end_char) +{ + char match; + +#ifndef ALLOW_WARNINGS + match = (char) 0; /* silence "might be used" */ + /* uninitialized" warning */ +#endif + + switch (end_char) + { + case '}': match = '{'; break; + case ')': match = '('; break; + case '"': match = '"'; break; + default: + internal_error ("end_string(): invalid end_char \"%c\"", end_char); + } + + assert (StringOpener == match); + + /* + * If we're at non-zero BraceDepth, that probably means mismatched braces + * somewhere -- complain about it and reset BraceDepth to minimize future + * confusion. + */ + + if (BraceDepth > 0) + { + lexical_error ("unbalanced braces: too many {'s"); + BraceDepth = 0; + } + + StringOpener = (char) 0; + StringStart = -1; + NLA = STRING; + + if (EntryState == in_comment) + { + int len = strlen (zzlextext); + + /* + * ARG! no, this is wrong -- what if unbalanced braces in the string + * and we try to output put it later? + * + * ARG! again, this is no more wrong than when we strip quotes in + * post_parse.c, and blithely assume that we can put them back on + * later for output in BibTeX syntax. Hmmm. + * + * Actually, it looks like this isn't a problem after all: you + * can't have unbalanced braces in a BibTeX string (at least + * not as parsed by btparse). + */ + + if (zzlextext[0] == '(') /* convert to standard quote delims */ + { + zzlextext[ 0] = '{'; + zzlextext[len-1] = '}'; + } + + EntryState = toplevel; + zzmode (START); + } + else + { + zzmode (LEX_ENTRY); + } + + report_state ("string"); +} + + +/* + * open_brace () + * + * Called when we see a '{', either to start a string (in which case + * it's called from start_string()) or inside a string (called directly + * from the lexer). + */ +void open_brace (void) +{ + BraceDepth++; + zzmore (); + report_state ("open_brace"); +} + + +/* + * close_brace () + * + * Called when we see a '}' inside a string. Decrements the depth counter + * and checks to see if we are down to depth 0, in which case the string is + * ended and the current lookahead token is set to STRING. Otherwise, + * just tells the lexer to keep slurping characters into the buffer. + */ +void close_brace (void) +{ + BraceDepth--; + if (StringOpener == '{' && BraceDepth == 0) + { + end_string ('}'); + } + + /* + * This could happen if some bonehead puts an unmatched right-brace + * in a quote-delimited string (eg. "Hello}"). To attempt to recover, + * we reset the depth to zero and continue slurping into the string. + */ + else if (BraceDepth < 0) + { + lexical_error ("unbalanced braces: too many }'s"); + BraceDepth = 0; + zzmore (); + } + + /* Otherwise, it's just any old right brace in a string -- keep eating */ + else + { + zzmore (); + } + report_state ("close_brace"); +} + + +void lparen_in_string (void) +{ + ParenDepth++; + zzmore (); +} + + +void rparen_in_string (void) +{ + ParenDepth--; + if (StringOpener == '(' && ParenDepth == 0) + { + end_string (')'); + } + else + { + zzmore (); + } +} + + +/* + * quote_in_string () + * + * Called when we see '"' in a string. Ends the string if the quote is at + * depth 0 and the string was started with a quote, otherwise instructs the + * lexer to continue munching happily along. (Also prints a warning, + * assuming that input is destined for processing by TeX and you really + * want either `` or '' rather than ".) + */ +void quote_in_string (void) +{ + if (StringOpener == '"' && BraceDepth == 0) + { + end_string ('"'); + } + else + { + boolean at_top = FALSE;; + + /* + * Note -- this warning assumes that strings are destined + * to be processed by TeX, so it should be optional. Hmmm. + */ + + if (StringOpener == '"' || StringOpener == '(') + at_top = (BraceDepth == 0); + else if (StringOpener == '{') + at_top = (BraceDepth == 1); + else + internal_error ("Illegal string opener \"%c\"", StringOpener); + + if (!QuoteWarned && at_top) + { + lexical_warning ("found \" at brace-depth zero in string " + "(TeX accents in BibTeX should be inside braces)"); + QuoteWarned = 1; + } + zzmore (); + } +} + + +/* + * check_runaway_string () + * + * Called from the lexer whenever we see a newline in a string. See + * bibtex.g for a detailed explanation; basically, this function + * looks for an entry start ("@name{") or new field ("name=") immediately + * after a newline (with possible whitespace). This is a heuristic + * check for runaway strings, under the assumption that text that looks + * like a new entry or new field won't actually occur inside a string + * very often. + */ +void check_runaway_string (void) +{ + int len; + int i; + + /* + * could these be made significantly more efficient by a 256-element + * lookup table instead of calling strchr()? + */ + static const char *alpha_chars = "abcdefghijklmnopqrstuvwxyz"; + static const char *name_chars = "abcdefghijklmnopqrstuvwxyz0123456789:+/'.-"; + + /* + * on entry: zzlextext contains the whole string, starting with { + * and with newlines/tabs converted to space; zzbegexpr points to + * a chunk of the string starting with newline (newlines and + * tabs have not yet been converted) + */ + +#if DEBUG > 1 + printf ("check_runaway_string(): zzline=%d\n", zzline); + printf ("zzlextext=>%s<\nzzbegexpr=>%s<\n", + zzlextext, zzbegexpr); +#endif + + + /* + * increment zzline to take the leading newline into account -- but + * first a sanity check to be sure that newline is there! + */ + + if (zzbegexpr[0] != '\n') + { + lexical_warning ("huh? something's wrong (buffer overflow?) near " + "offset %d (line %d)", zzendcol, zzline); + /* internal_error ("zzbegexpr (line %d, offset %d-%d, " + "text >%s<, expr >%s<)" + "should start with a newline", + zzline, zzbegcol, zzendcol, zzlextext, zzbegexpr); + */ + } + else + { + zzline++; + } + + /* standardize whitespace (convert all to space) */ + + len = strlen (zzbegexpr); + for (i = 0; i < len; i++) + { + if (isspace (zzbegexpr[i])) + zzbegexpr[i] = ' '; + } + + + if (!ApparentRunaway) /* haven't already warned about it */ + { + enum { none, entry, field, giveup } guess; + + i = 1; + guess = none; + while (i < len && zzbegexpr[i] == ' ') i++; + + if (zzbegexpr[i] == '@') + { + i++; + while (i < len && zzbegexpr[i] == ' ') i++; + guess = entry; + } + + if (strchr (alpha_chars, tolower (zzbegexpr[i])) != NULL) + { + while (i < len && strchr (name_chars, tolower (zzbegexpr[i])) != NULL) + i++; + while (i < len && zzbegexpr[i] == ' ') i++; + if (i == len) + { + guess = giveup; + } + else + { + if (guess == entry) + { + if (zzbegexpr[i] != '{' && zzbegexpr[i] != '(') + guess = giveup; + } + else /* assume it's a field */ + { + if (zzbegexpr[i] == '=') + guess = field; + else + guess = giveup; + } + } + } + else /* no name seen after WS or @ */ + { + guess = giveup; + } + + if (guess == none) + internal_error ("gee, I should have made a guess by now"); + + if (guess != giveup) + { + lexical_warning ("possible runaway string started at line %d", + StringStart); + ApparentRunaway = 1; + } + } + + zzmore(); +} + diff --git a/src/translators/btparse/lex_auxiliary.h b/src/translators/btparse/lex_auxiliary.h new file mode 100644 index 0000000..ebbf053 --- /dev/null +++ b/src/translators/btparse/lex_auxiliary.h @@ -0,0 +1,71 @@ +/* ------------------------------------------------------------------------ +@NAME : lex_auxiliary.h +@DESCRIPTION: Macros and function prototypes needed by the lexical scanner. + Some of these are called from internal PCCTS code, and some + are explicitly called from the lexer actions in bibtex.g. +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: lex_auxiliary.h,v 1.15 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +#ifndef LEX_AUXILIARY_H +#define LEX_AUXILIARY_H + +#include "btparse.h" +#include "attrib.h" + +#define ZZCOPY_FUNCTION 0 + +#if ZZCOPY_FUNCTION +#define ZZCOPY zzcopy (&zznextpos, &lastpos, &zzbufovf) +#else +#define ZZCOPY \ + if (zznextpos >= lastpos) \ + { \ + lexer_overflow (&lastpos, &zznextpos); \ + } \ + *(zznextpos++) = zzchar; +#endif + + +/* Function prototypes: */ + +void lex_info (void); +void zzcr_attr (Attrib *a, int tok, char *txt); + +void alloc_lex_buffer (int size); +void free_lex_buffer (void); +void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos); +#if ZZCOPY_FUNCTION +void zzcopy (char **nextpos, char **lastpos, int *ovf_flag); +#endif + +void initialize_lexer_state (void); +bt_metatype entry_metatype (void); + +void newline (void); +void comment (void); +void at_sign (void); +void toplevel_junk (void); +void name (void); +void lbrace (void); +void rbrace (void); +void lparen (void); +void rparen (void); + +void start_string (char start_char); +void end_string (char end_char); +void open_brace (void); +void close_brace (void); +void lparen_in_string (void); +void rparen_in_string (void); +void quote_in_string (void); +void check_runaway_string (void); + +#endif /* ! defined LEX_AUXILIARY_H */ diff --git a/src/translators/btparse/macros.c b/src/translators/btparse/macros.c new file mode 100644 index 0000000..06db983 --- /dev/null +++ b/src/translators/btparse/macros.c @@ -0,0 +1,367 @@ +/* ------------------------------------------------------------------------ +@NAME : macros.c +@DESCRIPTION: Front-end to the standard PCCTS symbol table code (sym.c) + to abstract my "macro table". +@GLOBALS : +@CALLS : +@CREATED : 1997/01/12, Greg Ward +@MODIFIED : +@VERSION : $Id: macros.c,v 1.19 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include "sym.h" +#include "prototypes.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ +#include "bt_debug.h" + + +/* + * NUM_MACROS and STRING_SIZE define the size of the static data + * structure that holds the macro table. The defaults are to allocate + * 4096 bytes of string space that will be divided up amongst 547 + * macros. This should be fine for most applications, but if you have a + * big macro table you might need to change these and recompile (don't + * forget to rebuild and reinstall Text::BibTeX if you're using it!). + * You can set these as high as you like; just remember that a block of + * STRING_SIZE bytes will be allocated and not freed as long as you're + * using btparse. Also, NUM_MACROS defines the size of a hashtable, so + * it should probably be a prime a bit greater than a power of 2 -- or + * something like that. I'm not sure of the exact Knuthian + * specification. + */ +#define NUM_MACROS 547 +#define STRING_SIZE 4096 + +Sym *AllMacros = NULL; /* `scope' so we can get back list */ + /* of all macros when done */ + + +GEN_PRIVATE_ERRFUNC (macro_warning, + (char * filename, int line, const char * fmt, ...), + BTERR_CONTENT, filename, line, NULL, -1, fmt) + + +/* ------------------------------------------------------------------------ +@NAME : init_macros() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Initializes the symbol table used to store macro values. +@GLOBALS : AllMacros +@CALLS : zzs_init(), zzs_scope() (sym.c) +@CALLERS : bt_initialize() (init.c) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +void +init_macros (void) +{ + zzs_init (NUM_MACROS, STRING_SIZE); + zzs_scope (&AllMacros); +} + + +/* ------------------------------------------------------------------------ +@NAME : done_macros() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees up all the macro values in the symbol table, and + then frees up the symbol table itself. +@GLOBALS : AllMacros +@CALLS : zzs_rmscope(), zzs_done() +@CALLERS : bt_cleanup() (init.c) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +void +done_macros (void) +{ + bt_delete_all_macros (); + zzs_done (); +} + + +static void +delete_macro_entry (Sym * sym) +{ + Sym * cur; + Sym * prev; + + /* + * Yechh! All this mucking about with the scope list really + * ought to be handled by the symbol table code. Must write + * my own someday. + */ + + /* Find this entry in the list of all macro table entries */ + cur = AllMacros; + prev = NULL; + while (cur != NULL && cur != sym) + { + prev = cur; + cur = cur->scope; + } + + if (cur == NULL) /* uh-oh -- wasn't found! */ + { + internal_error ("macro table entry for \"%s\" not found in scope list", + sym->symbol); + } + + /* Now unlink from the "scope" list */ + if (prev == NULL) /* it's the head of the list */ + AllMacros = cur->scope; + else + prev->scope = cur->scope; + + /* Remove it from the macro hash table */ + zzs_del (sym); + + /* And finally, free up the entry's text and the entry itself */ + if (sym->text) free (sym->text); + free (sym); +} /* delete_macro_entry() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_add_macro_value() +@INPUT : assignment - AST node representing "macro = value" + options - string-processing options that were used to + process this string after parsing +@OUTPUT : +@RETURNS : +@DESCRIPTION: Adds a value to the symbol table used for macros. + + If the value was not already post-processed as a macro value + (expand macros, paste substrings, but don't collapse + whitespace), then this post-processing is done before adding + the macro text to the table. + + If the macro is already defined, a warning is printed and + the old text is overridden. +@GLOBALS : +@CALLS : bt_add_macro_text() + bt_postprocess_field() +@CALLERS : bt_postprocess_entry() (post_parse.c) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +void +bt_add_macro_value (AST *assignment, ushort options) +{ + AST * value; + char * macro; + char * text; + boolean free_text; + + if (assignment == NULL || assignment->down == NULL) return; + value = assignment->down; + + /* + * If the options that were used to process the macro's expansion text + * are anything other than BTO_MACRO, then we'll have to do it ourselves. + */ + + if ((options & BTO_STRINGMASK) != BTO_MACRO) + { + text = bt_postprocess_field (assignment, BTO_MACRO, FALSE); + free_text = TRUE; /* because it's alloc'd by */ + /* bt_postprocess_field() */ + } + else + { + /* + * First a sanity check to make sure that the presumed post-processing + * had the desired effect. + */ + + if (value->nodetype != BTAST_STRING || value->right != NULL) + { + internal_error ("add_macro: macro value was not " + "correctly preprocessed"); + } + + text = assignment->down->text; + free_text = FALSE; + } + + macro = assignment->text; + bt_add_macro_text (macro, text, assignment->filename, assignment->line); + if (free_text && text != NULL) + free (text); + +} /* bt_add_macro_value() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_add_macro_text() +@INPUT : macro - the name of the macro to define + text - the macro text + filename, line - where the macro is defined; pass NULL + for filename if no file, 0 for line if no line number + (just used to generate warning message) +@OUTPUT : +@RETURNS : +@DESCRIPTION: Sets the text value for a macro. If the macro is already + defined, a warning is printed and the old value is overridden. +@GLOBALS : +@CALLS : zzs_get(), zzs_newadd() +@CALLERS : bt_add_macro_value() + (exported from library) +@CREATED : 1997/11/13, GPW (from code in bt_add_macro_value()) +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_add_macro_text (char * macro, char * text, char * filename, int line) +{ + Sym * sym; + Sym * new_rec; + +#if DEBUG == 1 + printf ("adding macro \"%s\" = \"%s\"\n", macro, text); +#elif DEBUG >= 2 + printf ("add_macro: macro = %p (%s)\n" + " text = %p (%s)\n", + macro, macro, text, text); +#endif + + if ((sym = zzs_get (macro))) + { + macro_warning (filename, line, + "overriding existing definition of macro \"%s\"", + macro); + delete_macro_entry (sym); + } + + new_rec = zzs_newadd (macro); + new_rec->text = (text != NULL) ? strdup (text) : NULL; + DBG_ACTION + (2, printf (" saved = %p (%s)\n", + new_rec->text, new_rec->text);) + +} /* bt_add_macro_text() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_delete_macro() +@INPUT : macro - name of macro to delete +@DESCRIPTION: Deletes a macro from the macro table. +@CALLS : zzs_get() +@CALLERS : +@CREATED : 1998/03/01, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_delete_macro (char * macro) +{ + Sym * sym; + + sym = zzs_get (macro); + if (! sym) return; + delete_macro_entry (sym); +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_delete_all_macros() +@DESCRIPTION: Deletes all macros from the macro table. +@CALLS : zzs_rmscore() +@CALLERS : +@CREATED : 1998/03/01, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_delete_all_macros (void) +{ + Sym *cur, *next; + + DBG_ACTION (2, printf ("bt_delete_all_macros():\n");) + + /* + * Use the current `scope' (same one for all macros) to get access to + * a linked list of all macros. Then traverse the list, free()'ing + * both the text (which was strdup()'d in add_macro(), below) and + * the records themselves (which are calloc()'d by zzs_new()). + */ + + cur = zzs_rmscope (&AllMacros); + while (cur != NULL) + { + DBG_ACTION + (2, printf (" freeing macro \"%s\" (%p=\"%s\") at %p\n", + cur->symbol, cur->text, cur->text, cur);) + + next = cur->scope; + if (cur->text != NULL) free (cur->text); + free (cur); + cur = next; + } +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_macro_length() +@INPUT : macro - the macro name +@OUTPUT : +@RETURNS : length of the macro's text, or zero if the macro is undefined +@DESCRIPTION: Returns length of a macro's text. +@GLOBALS : +@CALLS : zzs_get() +@CALLERS : bt_postprocess_value() + (exported from library) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +int +bt_macro_length (char *macro) +{ + Sym *sym; + + DBG_ACTION + (2, printf ("bt_macro_length: looking up \"%s\"\n", macro);) + + sym = zzs_get (macro); + if (sym) + return strlen (sym->text); + else + return 0; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_macro_text() +@INPUT : macro - the macro name + filename, line - where the macro was invoked; NULL for + `filename' and zero for `line' if not applicable +@OUTPUT : +@RETURNS : The text of the macro, or NULL if it's undefined. +@DESCRIPTION: Fetches a macros text; prints warning and returns NULL if + macro is undefined. +@CALLS : zzs_get() +@CALLERS : bt_postprocess_value() +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +char * +bt_macro_text (char * macro, char * filename, int line) +{ + Sym * sym; + + DBG_ACTION + (2, printf ("bt_macro_text: looking up \"%s\"\n", macro);) + + sym = zzs_get (macro); + if (!sym) + { + macro_warning (filename, line, "undefined macro \"%s\"", macro); + return NULL; + } + + return sym->text; +} diff --git a/src/translators/btparse/mode.h b/src/translators/btparse/mode.h new file mode 100644 index 0000000..25b36ce --- /dev/null +++ b/src/translators/btparse/mode.h @@ -0,0 +1,3 @@ +#define START 0 +#define LEX_ENTRY 1 +#define LEX_STRING 2 diff --git a/src/translators/btparse/modify.c b/src/translators/btparse/modify.c new file mode 100644 index 0000000..2d8d9c1 --- /dev/null +++ b/src/translators/btparse/modify.c @@ -0,0 +1,75 @@ +/* ------------------------------------------------------------------------ +@NAME : modify.c +@DESCRIPTION: Routines for modifying the AST for a single entry. +@GLOBALS : +@CALLS : +@CREATED : 1999/11/25, Greg Ward (based on code supplied by + St�phane Genaud <[email protected]>) +@MODIFIED : +@VERSION : $Id: modify.c,v 1.2 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include "btparse.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_text () +@INPUT : node + new_text +@OUTPUT : node->text +@RETURNS : +@DESCRIPTION: Replace the text member of an AST node with a new string. + The passed in string, 'new_text', is duplicated, so the + caller may free it without worry. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1999/11/25, GPW (from St�phane Genaud) +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_set_text (AST * node, char * new_text) +{ + free(node->text); + node->text = strdup (new_text); +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_entry_set_key () +@INPUT : entry + new_key +@OUTPUT : entry->down->text +@RETURNS : +@DESCRIPTION: Changes the key of a regular entry to 'new_key'. If 'entry' + is not a regular entry, or if it doesn't already have a child + node holding an entry key, bombs via 'usage_error()'. + Otherwise a duplicate of 'new_key' is copied into the entry + AST (so the caller can free that string without worry). +@CALLS : bt_set_text () +@CREATED : 1999/11/25, GPW (from St�phane Genaud) +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_entry_set_key (AST * entry, char * new_key) +{ + if (entry->metatype == BTE_REGULAR && + entry->down && entry->down->nodetype == BTAST_KEY) + { + bt_set_text (entry->down, new_key); + } + else + { + usage_error ("can't set entry key -- not a regular entry, " + "or doesn't have a key already"); + } +} diff --git a/src/translators/btparse/my_alloca.h b/src/translators/btparse/my_alloca.h new file mode 100644 index 0000000..0466157 --- /dev/null +++ b/src/translators/btparse/my_alloca.h @@ -0,0 +1,35 @@ +/* ------------------------------------------------------------------------ +@NAME : my_alloca.h +@DESCRIPTION: All-out assault at making alloca() available on any Unix + platform. Stolen from the GNU Autoconf manual. +@CREATED : 1997/10/30, Greg Ward +@VERSION : $Id: my_alloca.h,v 1.1 1997/10/31 03:56:17 greg Rel $ +@COPYRIGHT : This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef MY_ALLOCA_H +#define MY_ALLOCA_H + +#ifdef __GNUC__ +# ifndef alloca +# define alloca __builtin_alloca +# endif +#else +# if HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX +# pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif + +#endif /* MY_ALLOCA_H */ diff --git a/src/translators/btparse/names.c b/src/translators/btparse/names.c new file mode 100644 index 0000000..11c4bfd --- /dev/null +++ b/src/translators/btparse/names.c @@ -0,0 +1,915 @@ +/* ------------------------------------------------------------------------ +@NAME : names.c +@DESCRIPTION: Functions for dealing with BibTeX names and lists of names: + bt_split_list + bt_split_name +@GLOBALS : +@CALLS : +@CREATED : 1997/05/05, Greg Ward (as string_util.c) +@MODIFIED : 1997/05/14-05/16, GW: added all the code to split individual + names, renamed file to names.c +@VERSION : $Id: names.c,v 1.23 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "btparse.h" +#include "prototypes.h" +#include "error.h" +#include "my_alloca.h" +/*#include "my_dmalloc.h"*/ +#include "bt_debug.h" + + +#define MAX_COMMAS 2 + +#define update_depth(s,offs,depth) \ +switch (s[offs]) \ +{ \ + case '{': depth++; break; \ + case '}': depth--; break; \ +} + +/* + * `name_loc' specifies where a name is found -- used for generating + * useful warning messages. `line' and `name_num' are both 1-based. + */ +typedef struct +{ + char * filename; + int line; + int name_num; +} name_loc; + + +GEN_PRIVATE_ERRFUNC (name_warning, + (name_loc * loc, const char * fmt, ...), + BTERR_CONTENT, loc->filename, loc->line, + "name", loc->name_num, fmt) + + +/* ------------------------------------------------------------------------ +@NAME : bt_split_list() +@INPUT : string - string to split up; whitespace must be collapsed + eg. by bt_postprocess_string() + delim - delimiter to use; must be lowercase and should be + free of whitespace (code requires that delimiters + in string be surrounded by whitespace) + filename - source of string (for warning messages) + line - 1-based line number into file (for warning messages) + description - what substrings are (eg. "name") (for warning + messages); if NULL will use "substring" +@OUTPUT : substrings (*substrings is allocated by bt_split_list() for you) +@RETURNS : number of substrings found +@DESCRIPTION: Splits a string using a fixed delimiter, in the BibTeX way: + * delimiters at beginning or end of string are ignored + * delimiters in string must be surrounded by whitespace + * case insensitive + * delimiters at non-zero brace depth are ignored + + The list of substrings is returned as *substrings, which + is an array of pointers into a duplicate of string. This + duplicate copy has been scribbled on such that there is + a nul byte at the end of every substring. You should + call bt_free_list() to free both the duplicate copy + of string and *substrings itself. Do *not* walk over + the array free()'ing the substrings yourself, as this is + invalid -- they were not malloc()'d! +@GLOBALS : +@CALLS : +@CALLERS : anyone (exported by library) +@CREATED : 1997/05/05, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +bt_stringlist * +bt_split_list (char * string, + char * delim, + char * filename, + int line, + char * description) +{ + int depth; /* brace depth */ + int i, j; /* offset into string and delim */ + int inword; /* flag telling if prev. char == ws */ + int string_len; + int delim_len; + int maxdiv; /* upper limit on no. of divisions */ + int maxoffs; /* max offset of delim in string */ + int numdiv; /* number of divisions */ + int * start; /* start of each division */ + int * stop; /* stop of each division */ + bt_stringlist * + list; /* structure to return */ + + if (string == NULL) + return NULL; + if (description == NULL) + description = "substring"; + + string_len = strlen (string); + delim_len = strlen (delim); + maxdiv = (string_len / delim_len) + 1; + maxoffs = string_len - delim_len + 1; + + /* + * This is a bit of a band-aid solution to the "split empty string" + * bug (formerly hit the internal_error() at the end of hte function). + * Still need a general "detect and fix unpreprocessed string" -- + * admittedly a different bug/misfeature. + */ + if (string_len == 0) + return NULL; + + start = (int *) alloca (maxdiv * sizeof (int)); + stop = (int *) alloca (maxdiv * sizeof (int)); + + list = (bt_stringlist *) malloc (sizeof (bt_stringlist)); + + depth = 0; + i = j = 0; + inword = 1; /* so leading delim ignored */ + numdiv = 0; + start[0] = 0; /* first substring @ start of string */ + + while (i < maxoffs) + { + /* does current char. in string match current char. in delim? */ + if (depth == 0 && !inword && tolower (string[i]) == delim[j]) + { + j++; i++; + + /* have we found an entire delim, followed by a space? */ + if (j == delim_len && string[i] == ' ') + { + + stop[numdiv] = i - delim_len - 1; + start[++numdiv] = ++i; + j = 0; + +#if DEBUG + printf ("found complete delim; i == %d, numdiv == %d: " + "stop[%d] == %d, start[%d] == %d\n", + i, numdiv, + numdiv-1, stop[numdiv-1], + numdiv, start[numdiv]); +#endif + } + } + + /* no match between string and delim, at non-zero depth, or in a word */ + else + { + update_depth (string, i, depth); + inword = (i < string_len) && (string[i] != ' '); + i++; + j = 0; + } + } + + stop[numdiv] = string_len; /* last substring ends just past eos */ + list->num_items = numdiv+1; + + + /* + * OK, now we know how many divisions there are and where they are -- + * so let's split that string up for real! + * + * list->items will be an array of pointers into a duplicate of + * `string'; we duplicate `string' so we can safely scribble on it and + * free() it later (in bt_free_list()). + */ + + list->items = (char **) malloc (list->num_items * sizeof (char *)); + list->string = strdup (string); + + for (i = 0; i < list->num_items; i++) + { + /* + * Possible cases: + * - stop < start is for empty elements, e.g. "and and" seen in + * input. (`start' for empty element will be the 'a' of the + * second 'and', and its stop will be the ' ' *before* the + * second 'and'.) + * - stop > start is for anything else between two and's (the usual) + * - stop == start should never happen if the loop above is correct + */ + + if (stop[i] > start[i]) /* the usual case */ + { + list->string[stop[i]] = 0; + list->items[i] = list->string+start[i]; + } + else if (stop[i] < start[i]) /* empty element */ + { + list->items[i] = NULL; + general_error (BTERR_CONTENT, filename, line, + description, i+1, "empty %s", description); + } + else /* should not happen! */ + { + internal_error ("stop == start for substring %d", i); + } + } + + return list; +/* return num_substrings; */ + +} /* bt_split_list () */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_list() +@INPUT : list +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees the list of strings created by bt_split_list(). +@GLOBALS : +@CALLS : +@CALLERS : anyone (exported by library) +@CREATED : 1997/05/06, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_free_list (bt_stringlist *list) +{ + if (list && list->string) free (list->string); + if (list && list->items) free (list->items); + if (list) free (list); +} + + + +/* ---------------------------------------------------------------------- + * Stuff for splitting up a single name + */ + + +/* ------------------------------------------------------------------------ +@NAME : find_commas +@INPUT : name - string to search for commas + max_commas - maximum number of commas to allow (if more than + this number are seen, a warning is printed and + the excess commas are removed) +@OUTPUT : +@RETURNS : number of commas found +@DESCRIPTION: Counts and records positions of commas at brace-depth 0. + Modifies string in-place to remove whitespace around commas, + excess commas, and any trailing commas; warns on excess or + trailing commas. Excess commas are removed by replacing them + with space and calling bt_postprocess_string() to collapse + whitespace a second time; trailing commas are simply replaced + with (char) 0 to truncate the string. + + Assumes whitespace has been collapsed (ie. no space at + beginning or end of string, and all internal strings of + whitespace reduced to exactly one space). +@GLOBALS : +@CALLS : name_warning() (if too many commas, or commas at end) +@CALLERS : bt_split_name() +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +find_commas (name_loc * loc, char *name, int max_commas) +{ + int i, j; + int depth; + int num_commas; + int len; + boolean at_comma; + boolean warned; + + i = j = 0; + depth = 0; + num_commas = 0; + len = strlen (name); + warned = 0; + + /* First pass to check for and blank out excess commas */ + + for (i = 0; i < len; i++) + { + if (depth == 0 && name[i] == ',') + { + num_commas++; + if (num_commas > max_commas) + { + if (! warned) + { + name_warning (loc, "too many commas in name (removing extras)"); + warned = TRUE; + } + name[i] = ' '; + } + } + } + + /* + * If we blanked out a comma, better re-collapse whitespace. (This is + * a bit of a cop-out -- I could probably adjust i and j appropriately + * in the above loop to do the collapsing for me, but my brain + * hurt when I tried to think it through. Some other time, perhaps. + */ + + if (warned) + bt_postprocess_string (name, BTO_COLLAPSE); + + /* Now the real comma-finding loop (only if necessary) */ + + if (num_commas == 0) + return 0; + + num_commas = 0; + i = 0; + while (i < len) + { + at_comma = (depth == 0 && name[i] == ','); + if (at_comma) + { + while (j > 0 && name[j-1] == ' ') j--; + num_commas++; + } + + update_depth (name, i, depth); + if (i != j) + name[j] = name[i]; + + i++; j++; + if (at_comma) + { + while (i < len && name[i] == ' ') i++; + } + } /* while i */ + + if (i != j) name[j] = (char) 0; + j--; + + if (name[j] == ',') + { + name_warning (loc, "comma(s) at end of name (removing)"); + while (name[j] == ',') + { + name[j--] = (char) 0; + num_commas--; + } + } + + return num_commas; + +} /* find_commas() */ + + +/* ------------------------------------------------------------------------ +@NAME : find_tokens +@INPUT : name - string to tokenize (should be a private copy + that we're free to clobber and mangle) +@OUTPUT : comma_token- number of token immediately preceding each comma + (caller must allocate with at least one element + per comma in `name') +@RETURNS : newly-allocated bt_stringlist structure +@DESCRIPTION: Finds tokens in a string; delimiter is space or comma at + brace-depth zero. Assumes whitespace has been collapsed + and find_commas has been run on the string to remove + whitespace around commas and any trailing commas. + + The bt_stringlist structure returned can (and should) be + freed with bt_free_list(). +@GLOBALS : +@CALLS : +@CALLERS : bt_split_name() +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static bt_stringlist * +find_tokens (char * name, + int * comma_token) +{ + int i; /* index into name */ + int num_tok; + int in_boundary; /* previous char was ' ' or ',' */ + int cur_comma; /* index into comma_token */ + int len; + int depth; + bt_stringlist * + tokens; + + i = 0; + in_boundary = 1; /* so first char will start a token */ + cur_comma = 0; + len = strlen (name); + depth = 0; + + tokens = (bt_stringlist *) malloc (sizeof (bt_stringlist)); + /* tokens->string = name ? strdup (name) : NULL; */ + tokens->string = name; + num_tok = 0; + tokens->items = NULL; + + if (len == 0) /* empty string? */ + return tokens; /* return empty token list */ + + tokens->items = (char **) malloc (sizeof (char *) * len); + + while (i < len) + { + if (depth == 0 && in_boundary) /* at start of a new token */ + { + tokens->items[num_tok++] = name+i; + } + + if (depth == 0 && (name[i] == ' ' || name[i] == ',')) + { + /* if we're at a comma, record the token preceding the comma */ + + if (name[i] == ',') + { + comma_token[cur_comma++] = num_tok-1; + } + + /* + * if already in a boundary zone, we have an empty token + * (caused by multiple consecutive commas) + */ + if (in_boundary) + { + tokens->items[num_tok-1] = NULL; + } + num_tok--; + + /* in any case, mark the end of one token and prepare for the + * start of the next + */ + name[i] = (char) 0; + in_boundary = 1; + } + else + { + in_boundary = 0; /* inside a token */ + } + + update_depth (name, i, depth); + i++; + + } /* while i */ + + tokens->num_items = num_tok; + return tokens; + +} /* find_tokens() */ + + +/* ------------------------------------------------------------------------ +@NAME : find_lc_tokens() +@INPUT : tokens +@OUTPUT : first_lc + last_lc +@RETURNS : +@DESCRIPTION: Finds the first contiguous string of lowercase tokens in + `name'. The string must already be tokenized by + find_tokens(), and the input args num_tok, tok_start, and + tok_stop are the return value and the two same-named output + arguments from find_tokens(). +@GLOBALS : +@CALLS : +@CALLERS : bt_split_name() +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +find_lc_tokens (bt_stringlist * tokens, + int * first_lc, + int * last_lc) +{ + int i; /* iterate over token list this time */ + int in_lc_sequence; /* in contig. sequence of lc tokens? */ + + *first_lc = *last_lc = -1; /* haven't found either yet */ + in_lc_sequence = 0; + + i = 0; + while (i < tokens->num_items) + { + if (*first_lc == -1 && islower (tokens->items[i][0])) + { + *first_lc = i; + + i++; + while (i < tokens->num_items && islower (tokens->items[i][0])) + i++; + + *last_lc = i-1; + } + else + { + i++; + } + } +} /* find_lc_tokens() */ + + +/* ------------------------------------------------------------------------ +@NAME : resolve_token_range() +@INPUT : tokens - structure containing the token list + tok_range - two-element array with start and stop token number +@OUTPUT : *part - set to point to first token in range, or NULL + if empty range + *num_tok - number of tokens in the range +@RETURNS : +@DESCRIPTION: Given a list of tokens and a range of token numbers (as a + two-element array, tok_range), computes the number of tokens + in the range. If this is >= 0, sets *part to point + to the first token in the range; otherwise, sets *part + to NULL. +@CALLERS : +@CREATED : May 1997, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +resolve_token_range (bt_stringlist *tokens, + int * tok_range, + char *** part, + int * num_tok) +{ + *num_tok = (tok_range[1] - tok_range[0]) + 1; + if (*num_tok <= 0) + { + *num_tok = 0; + *part = NULL; + } + else + { + *part = tokens->items + tok_range[0]; + } +} /* resolve_token_range() */ + + +/* ------------------------------------------------------------------------ +@NAME : split_simple_name() +@INPUT : name + first_lc + last_lc +@OUTPUT : name +@RETURNS : +@DESCRIPTION: Splits up a name (represented as a string divided into + non-overlapping, whitespace-separated tokens) according + to the BibTeX rules for names without commas. Specifically: + * tokens up to (but not including) the first lowercase + token, or the last token of the string if there + are no lowercase tokens, become the `first' part + * the earliest contiguous sequence of lowercase tokens, + up to (but not including) the last token of the string, + becomes the `von' part + * the tokens following the `von' part, or the last + single token if there is no `von' part, become + the `last' part + * there is no `jr' part +@GLOBALS : +@CALLS : name_warning() (if last lc token taken as lastname) + resolve_token_range() +@CALLERS : bt_split_name() +@CREATED : 1997/05/15, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +split_simple_name (name_loc * loc, + bt_name * name, + int first_lc, + int last_lc) +{ + int first_t[2], von_t[2], last_t[2]; + int end; + + end = name->tokens->num_items-1; /* token number of last token */ + + if (first_lc > -1) /* any lowercase tokens at all? */ + { + first_t[0] = 0; /* first name goes from beginning */ + first_t[1] = first_lc-1; /* to just before first lc token */ + + if (last_lc == end) /* sequence of lowercase tokens */ + { /* goes all the way to end of string */ + last_lc--; /* -- roll it back by one so we */ + /* still have a lastname */ +#ifdef WARN_LC_LASTNAME + /* + * disable this warning for now because "others" is used fairly + * often as a name in BibTeX databases -- oops! + */ + name_warning (loc, + "no capitalized token at end of name; " + "using \"%s\" as lastname", + name->tokens->items[end]); +#else +# ifndef ALLOW_WARNINGS + loc = NULL; /* avoid "unused parameter" warning */ +# endif +#endif + } + + von_t[0] = first_lc; /* `von' part covers sequence of */ + von_t[1] = last_lc; /* lowercase tokens */ + last_t[0] = last_lc+1; /* lastname from after `von' to end */ + last_t[1] = end; /* of string */ + } + else /* no lowercase tokens */ + { + von_t[0] = 0; /* empty `von' part */ + von_t[1] = -1; + first_t[0] = 0; /* `first' goes from first to second */ + first_t[1] = end-1; /* last token */ + last_t[0] = last_t[1] = end; /* and `last' is just the last token */ + } + + resolve_token_range (name->tokens, first_t, + name->parts+BTN_FIRST, name->part_len+BTN_FIRST); + resolve_token_range (name->tokens, von_t, + name->parts+BTN_VON, name->part_len+BTN_VON); + resolve_token_range (name->tokens, last_t, + name->parts+BTN_LAST, name->part_len+BTN_LAST); + name->parts[BTN_JR] = NULL; /* no jr part possible */ + name->part_len[BTN_JR] = 0; + +} /* split_simple_name() */ + + +/* ------------------------------------------------------------------------ +@NAME : split_general_name() +@INPUT : name + num_commas + comma_token + first_lc + last_lc +@OUTPUT : name +@RETURNS : +@DESCRIPTION: Splits a name according to the BibTeX rules for names + with 1 or 2 commas (> 2 commas is handled elsewhere, + namely by bt_split_name() calling find_commas() with + max_commas == 2). Specifically: + * an initial string of lowercase tokens, up to (but not + including) the token before the first comma, becomes + the `von' part + * tokens from immediately after the `von' part, + or from the beginning of the string if no `von', + up to the first comma become the `last' part + + if one comma: + * all tokens following the sole comma become the + `first' part + + if two commas: + * tokens between the two commas become the `jr' part + * all tokens following the second comma become the + `first' part +@GLOBALS : +@CALLS : name_warning() (if last lc token taken as lastname) + resolve_token_range() +@CALLERS : bt_split_name() +@CREATED : 1997/05/15, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +split_general_name (name_loc * loc, + bt_name * name, + int num_commas, + int * comma_token, + int first_lc, + int last_lc) +{ + int first_t[2], von_t[2], last_t[2], jr_t[2]; + int end; + + end = name->tokens->num_items-1; /* last token number */ + + if (first_lc == 0) /* we have an initial string of */ + { /* lowercase tokens */ + if (last_lc == comma_token[0]) /* lc string ends at first comma */ + { + name_warning (loc, "no capitalized tokens before first comma"); + last_lc--; + } + + von_t[0] = first_lc; /* `von' covers the sequence of */ + von_t[1] = last_lc; /* lowercase tokens */ + } + else /* no lowercase tokens at start */ + { + von_t[0] = 0; /* empty `von' part */ + von_t[1] = -1; + } + + last_t[0] = von_t[1] + 1; /* start right after end of `von' */ + last_t[1] = comma_token[0]; /* and end at first comma */ + + if (num_commas == 1) + { + first_t[0] = comma_token[0]+1; /* start right after comma */ + first_t[1] = end; /* stop at end of string */ + jr_t[0] = 0; /* empty `jr' part */ + jr_t[1] = -1; + } + else /* more than 1 comma */ + { + jr_t[0] = comma_token[0]+1; /* start after first comma */ + jr_t[1] = comma_token[1]; /* stop at second comma */ + first_t[0] = comma_token[1]+1; /* start after second comma */ + first_t[1] = end; /* and go to end */ + } + + resolve_token_range (name->tokens, first_t, + name->parts+BTN_FIRST, name->part_len+BTN_FIRST); + resolve_token_range (name->tokens, von_t, + name->parts+BTN_VON, name->part_len+BTN_VON); + resolve_token_range (name->tokens, last_t, + name->parts+BTN_LAST, name->part_len+BTN_LAST); + resolve_token_range (name->tokens, jr_t, + name->parts+BTN_JR, name->part_len+BTN_JR); + +} /* split_general_name() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_split_name() +@INPUT : name + filename + line + name_num +@OUTPUT : +@RETURNS : newly-allocated bt_name structure containing the four + parts as token-lists +@DESCRIPTION: Splits a name according to the BibTeX rules. There are + actually two sets of rules: one for names with no commas, + and one for names with 1 or 2 commas. (If a name has + more than 2 commas, the extras are removed and it's treated + as though it had just the first 2.) + + See split_simple_name() for the no-comma rules, and + split_general_name() for the 1-or-2-commas rules. + + The bt_name structure returned can (and should) be freed + with bt_free_name() when you no longer need it. +@GLOBALS : +@CALLS : +@CALLERS : anyone (exported by library) +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +@COMMENTS : The name-splitting code all implicitly assumes that the + string being split has been post-processed to collapse + whitespace in the BibTeX way. This means that it tends to + dump core on such things as leading whitespace, or more than + one space in a row inside the string. This could probably be + alleviated with a call to bt_postprocess_string(), possibly + preceded by a check for any of those occurences. Before + doing that, though, I want to examine the code carefully to + determine just what assumptions it makes -- so I can + check/correct for all of them. +-------------------------------------------------------------------------- */ +bt_name * +bt_split_name (char * name, + char * filename, + int line, + int name_num) +{ + name_loc loc; + bt_stringlist * + tokens; + int comma_token[MAX_COMMAS]; + int len; + int num_commas; + int first_lc, last_lc; + bt_name * split_name; + int i; + + DBG_ACTION (1, printf ("bt_split_name(): name=%p (%s)\n", name, name)) + + split_name = (bt_name *) malloc (sizeof (bt_name)); + if (name == NULL) + { + len = 0; + } + else + { + name = strdup (name); /* private copy that we may clobber */ + len = strlen (name); + } + + DBG_ACTION (1, printf ("bt_split_name(): split_name=%p\n", split_name)) + + if (len == 0) /* non-existent or empty string? */ + { + split_name->tokens = NULL; + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + split_name->parts[i] = NULL; + split_name->part_len[i] = 0; + } + return split_name; + } + + loc.filename = filename; /* so called functions can generate */ + loc.line = line; /* decent warning messages */ + loc.name_num = name_num; + + num_commas = find_commas (&loc, name, MAX_COMMAS); + assert (num_commas <= MAX_COMMAS); + + DBG_ACTION (1, printf ("found %d commas: ", num_commas)) + + tokens = find_tokens (name, comma_token); + +#if DEBUG + printf ("found %d tokens:\n", tokens->num_items); + for (i = 0; i < tokens->num_items; i++) + { + printf (" %d: ", i); + + if (tokens->items[i]) /* non-empty token? */ + { + printf (">%s<\n", tokens->items[i]); + } + else + { + printf ("(empty)\n"); + } + } +#endif + +#if DEBUG + printf ("comma tokens: "); + for (i = 0; i < num_commas; i++) + printf ("%d ", comma_token[i]); + printf ("\n"); +#endif + + find_lc_tokens (tokens, &first_lc, &last_lc); +#if DEBUG + printf ("(first,last) lc tokens = (%d,%d)\n", first_lc, last_lc); +#endif + + if (strlen (name) == 0) /* name now empty? */ + { + split_name->tokens = NULL; + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + split_name->parts[i] = NULL; + split_name->part_len[i] = 0; + } + } + else + { + split_name->tokens = tokens; + if (num_commas == 0) /* no commas -- "simple" format */ + { + split_simple_name (&loc, split_name, + first_lc, last_lc); + } + else + { + split_general_name (&loc, split_name, + num_commas, comma_token, + first_lc, last_lc); + } + } + +#if DEBUG + printf ("bt_split_name(): returning structure %p\n", split_name); +#endif + return split_name; +} /* bt_split_name() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_name() +@INPUT : name +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees up any memory allocated for a bt_name structure + (namely, the `tokens' field [a bt_stringlist structure, + this freed with bt_free_list()] and the structure itself.) +@CALLS : bt_free_list() +@CALLERS : anyone (exported) +@CREATED : 1997/11/14, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_free_name (bt_name * name) +{ + DBG_ACTION (2, printf ("bt_free_name(): freeing name %p " + "(%d tokens, string=%p (%s), last[0]=%s)\n", + name, + name->tokens->num_items, + name->tokens->string, + name->tokens->string, + name->parts[BTN_LAST][0])); + bt_free_list (name->tokens); + free (name); + DBG_ACTION (2, printf ("bt_free_name(): done, everything freed\n")); +} diff --git a/src/translators/btparse/parse_auxiliary.c b/src/translators/btparse/parse_auxiliary.c new file mode 100644 index 0000000..f509741 --- /dev/null +++ b/src/translators/btparse/parse_auxiliary.c @@ -0,0 +1,336 @@ +/* ------------------------------------------------------------------------ +@NAME : parse_auxiliary.c +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Anything needed by the parser that's too hairy to go in the + grammar itself. Currently, just stuff needed for generating + syntax errors. (See error.c for how they're actually + printed.) +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1996/08/07, Greg Ward +@MODIFIED : +@VERSION : $Id: parse_auxiliary.c,v 1.20 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include "stdpccts.h" +#include "error.h" +#include "lex_auxiliary.h" +#include "parse_auxiliary.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* from input.c */ + +GEN_PRIVATE_ERRFUNC (syntax_error, (char * fmt, ...), + BTERR_SYNTAX, InputFilename, zzline, NULL, -1, fmt) + + +/* this is stolen from PCCTS' err.h */ +static SetWordType bitmask[] = +{ + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080 +}; + +static struct +{ + int token; + const char *new_name; +} new_tokens[] = +{ + { AT, "\"@\"" }, + { NAME, "name (entry type, key, field, or macro name)" }, + { LBRACE, "left brace (\"{\")" }, + { RBRACE, "right brace (\"}\")" }, + { ENTRY_OPEN, "start of entry (\"{\" or \"(\")" }, + { ENTRY_CLOSE,"end of entry (\"}\" or \")\")" }, + { EQUALS, "\"=\"" }, + { HASH, "\"#\"" }, + { COMMA, "\",\"" }, + { NUMBER, "number" }, + { STRING, "quoted string ({...} or \"...\")" } +}; + + +#ifdef CLEVER_TOKEN_STUFF +char **token_names; +#endif + + +void +fix_token_names (void) +{ + int i; + int num_replace; + +#ifdef CLEVER_TOKEN_STUFF /* clever, but it doesn't work... */ + /* arg! this doesn't work because I don't know how to find out the + * number of tokens + */ + + int num_tok; + + num_tok = (sizeof(zztokens) / sizeof(*zztokens)); + sizeof (zztokens); + sizeof (*zztokens); + token_names = (char **) malloc (sizeof (char *) * num_tok); + + for (i = 0; i < num_tok; i++) + { + token_names[i] = zztokens[i]; + } +#endif + + num_replace = (sizeof(new_tokens) / sizeof(*new_tokens)); + for (i = 0; i < num_replace; i++) + { + const char *new = new_tokens[i].new_name; + const char **old = zztokens + new_tokens[i].token; + + *old = new; + } +} + + +#ifdef USER_ZZSYN + +static void +append_token_set (char *msg, SetWordType *a) +{ + SetWordType *p = a; + SetWordType *endp = &(p[zzSET_SIZE]); + unsigned e = 0; + int tokens_printed = 0; + + do + { + SetWordType t = *p; + SetWordType *b = &(bitmask[0]); + do + { + if (t & *b) + { + strcat (msg, zztokens[e]); + tokens_printed++; + if (tokens_printed < zzset_deg (a) - 1) + strcat (msg, ", "); + else if (tokens_printed == zzset_deg (a) - 1) + strcat (msg, " or "); + } + e++; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + } while (++p < endp); +} + + +void +zzsyn(const char * text, + int tok, + char * egroup, + SetWordType * eset, + int etok, + int k, + const char * bad_text) +{ + static char msg [MAX_ERROR]; + int len; + +#ifndef ALLOW_WARNINGS + text = NULL; /* avoid "unused parameter" warning */ +#endif + + /* Initial message: give location of error */ + + msg[0] = (char) 0; /* make sure string is empty to start! */ + if (tok == zzEOF_TOKEN) + strcat (msg, "at end of input"); + else + sprintf (msg, "found \"%s\"", bad_text); + + len = strlen (msg); + + + /* Caller supplied neither a single token nor set of tokens expected... */ + + if (!etok && !eset) + { + syntax_error (msg); + return; + } + else + { + strcat (msg, ", "); + len += 2; + } + + + /* I'm not quite sure what this is all about, or where k would be != 1... */ + + if (k != 1) + { + sprintf (msg+len, "; \"%s\" not", bad_text); + if (zzset_deg (eset) > 1) strcat (msg, " in"); + len = strlen (msg); + } + + + /* This is the code that usually gets run */ + + if (zzset_deg (eset) > 0) + { + if (zzset_deg (eset) == 1) + strcat (msg, "expected "); + else + strcat (msg, "expected one of: "); + + append_token_set (msg, eset); + } + else + { + sprintf (msg+len, "expected %s", zztokens[etok]); + if (etok == ENTRY_CLOSE) + { + strcat (msg, " (skipping to next \"@\")"); + initialize_lexer_state (); + } + } + + len = strlen (msg); + if (egroup && strlen (egroup) > 0) + sprintf (msg+len, " in %s", egroup); + + syntax_error (msg); + +} +#endif /* USER_ZZSYN */ + + +void +check_field_name (AST * field) +{ + char * name; + + if (! field || field->nodetype != BTAST_FIELD) + return; + + name = field->text; + if (strchr ("0123456789", name[0])) + syntax_error ("invalid field name \"%s\": cannot start with digit", + name); +} + + +#ifdef STACK_DUMP_CODE + +static void +show_ast_stack_elem (int num) +{ + extern const char *nodetype_names[]; /* nicked from bibtex_ast.c */ + /* bt_nodetype nodetype; + bt_metatype metatype; */ + AST *elem; + + elem = zzastStack[num]; + printf ("zzastStack[%3d] = ", num); + if (elem) + { + /* get_node_type (elem, &nodetype, &metatype); */ + if (elem->nodetype <= BTAST_MACRO) + { + printf ("{ %s: \"%s\" (line %d, char %d) }\n", + nodetype_names[elem->nodetype], + elem->text, elem->line, elem->offset); + } + else + { + printf ("bogus node (uninitialized?)\n"); + } + } + else + { + printf ("NULL\n"); + } +} + + +static void +show_ast_stack_top (char *label) +{ + if (label) + printf ("%s: ast stack top: ", label); + else + printf ("ast stack top: "); + show_ast_stack_elem (zzast_sp); +} + + +static void +dump_ast_stack (char *label) +{ + int i; + + if (label) + printf ("%s: complete ast stack:\n", label); + else + printf ("complete ast stack:\n"); + + for (i = zzast_sp; i < ZZAST_STACKSIZE; i++) + { + printf (" "); + show_ast_stack_elem (i); + } +} + + +static void +show_attrib_stack_elem (int num) +{ + Attrib elem; + + elem = zzaStack[num]; + printf ("zzaStack[%3d] = ", num); + printf ("{ \"%s\" (token %d (%s), line %d, char %d) }\n", + elem.text, elem.token, zztokens[elem.token], + elem.line, elem.offset); +} + + +static void +show_attrib_stack_top (char *label) +{ + if (label) + printf ("%s: attrib stack top: ", label); + else + printf ("attrib stack top: "); + show_attrib_stack_elem (zzasp); +} + + +static void +dump_attrib_stack (char *label) +{ + int i; + + if (label) + printf ("%s: complete attrib stack:\n", label); + else + printf ("complete attrib stack:\n"); + + for (i = zzasp; i < ZZA_STACKSIZE; i++) + { + printf (" "); + show_attrib_stack_elem (i); + } +} + +#endif /* STACK_DUMP_CODE */ diff --git a/src/translators/btparse/parse_auxiliary.h b/src/translators/btparse/parse_auxiliary.h new file mode 100644 index 0000000..5500513 --- /dev/null +++ b/src/translators/btparse/parse_auxiliary.h @@ -0,0 +1,32 @@ +/* ------------------------------------------------------------------------ +@NAME : parse_auxiliary.h +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Prototype declarations for functions in parse_auxiliary.c +@GLOBALS : +@CALLS : +@CREATED : 1997/01/08, Greg Ward +@MODIFIED : +@VERSION : $Id: parse_auxiliary.h,v 1.5 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef PARSE_AUXILIARY_H +#define PARSE_AUXILIARY_H + +#include "stdpccts.h" /* for SetWordType typedef */ + +void fix_token_names (void); +void zzsyn (const char *text, int tok, + char *egroup, SetWordType *eset, int etok, + int k, const char *bad_text); +void check_field_name (AST * field); + +#endif /* PARSE_AUXILIARY_H */ diff --git a/src/translators/btparse/postprocess.c b/src/translators/btparse/postprocess.c new file mode 100644 index 0000000..7f7bfd4 --- /dev/null +++ b/src/translators/btparse/postprocess.c @@ -0,0 +1,498 @@ +/* ------------------------------------------------------------------------ +@NAME : postprocess.c +@DESCRIPTION: Operations applied to the AST (or strings in it) after + parsing is complete. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/12, Greg Ward (from code in bibparse.c, lex_auxiliary.c) +@MODIFIED : +@VERSION : $Id: postprocess.c,v 1.25 2000/05/02 23:06:31 greg Exp $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "btparse.h" +#include "error.h" +#include "parse_auxiliary.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + +#define DEBUG 1 + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_string () +@INPUT : s + options +@OUTPUT : s (modified in place according to the flags) +@RETURNS : (void) +@DESCRIPTION: Make a pass over string s (which is modified in-place) to + optionally collapse whitespace according to BibTeX rules + (if the BTO_COLLAPSE bit in options is true). + + Rules for collapsing whitespace are: + * whitespace at beginning/end of string is deleted + * within the string, each whitespace sequence is replaced by + a single space + + Note that part of the work is done by the lexer proper, + namely conversion of tabs and newlines to spaces. +@GLOBALS : +@CALLS : +@CREATED : originally in lex_auxiliary.c; moved here 1997/01/12 +@MODIFIED : +@COMMENTS : this only collapses whitespace now -- rename it??? +-------------------------------------------------------------------------- */ +void +bt_postprocess_string (char * s, ushort options) +{ + boolean collapse_whitespace; + char *i, *j; + int len; + + if (s == NULL) return; /* quit if no string supplied */ + +#if DEBUG > 1 + printf ("bt_postprocess_string: looking at >%s<\n", s); +#endif + + /* Extract any relevant options (just one currently) to local flags. */ + collapse_whitespace = options & BTO_COLLAPSE; + + /* + * N.B. i and j will both point into s; j is always >= i, and + * we copy characters from j to i. Whitespace is collapsed/deleted + * by advancing j without advancing i. + */ + i = j = s; /* start both at beginning of string */ + + /* + * If we're supposed to collapse whitespace, then advance j to the + * first non-space character. + */ + if (collapse_whitespace) + { + while (*j == ' ' && *j != (char) 0) + j++; + } + + while (*j != (char) 0) + { + /* + * If we're in a string of spaces (ie. current and previous char. + * are both space), and we're supposed to be collapsing whitespace, + * then skip until we hit a non-space character (or end of string). + */ + if (collapse_whitespace && *j == ' ' && *(j-1) == ' ') + { + while (*j == ' ') j++; /* skip spaces */ + if (*j == (char) 0) /* reached end of string? */ + break; + } + + /* Copy the current character from j down to i */ + *(i++) = *(j++); + } + *i = (char) 0; /* ensure string is terminated */ + + + /* + * And mop up whitespace (if any) at end of string -- note that if there + * was any whitespace there, it has already been collapsed to exactly + * one space. + */ + len = strlen (s); + if (len > 0 && collapse_whitespace && s[len-1] == ' ') + { + s[--len] = (char) 0; + } + +#if DEBUG > 1 + printf (" transformed to >%s<\n", s); +#endif + +} /* bt_postprocess_string */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_value() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Post-processes a series of strings (compound value), + frequently found as the value of a "field = value" or "macro + = value" assignment. The actions taken here are governed by + the bits in 'options', but there are two distinct modes of + operation: pasting or not. + + We paste strings if and only if the BTO_PASTE bit in options + is set and there are two or more simple values in the + compound value. In this case, the BTO_EXPAND bit must be set + (it would be very silly to paste together strings with + unexpanded macro names!), and we make two passes over the + data: one to postprocess individual strings and accumulate + the one big string, and a second to postprocess the big + string. In the first pass, the caller-supplied 'options' + variable is largely ignored; we will never collapse + whitespace in the individual strings. The caller's wishes + are fully respected when we make the final post-processing + pass over the concatenation of the individual strings, + though. + + If we're not pasting strings, then the character of the + individual simple values will be preserved; macros might not + be expanded (depending on the BTO_EXPAND bit), numbers will + stay numbers, and strings will be post-processed + independently according to the 'options' variable. (Beware + -- this means you might collapse whitespace in individual + sub-strings, which would be bad if you intend to concatenate + them later in the BibTeX sense.) + + The 'replace' parameter is used to govern whether the + existing strings in the AST should be replaced with their + post-processed versions. This can extend as far as + collapsing a series of simple values into a single BTAST_STRING + node, if we paste sub-strings together. If replace is FALSE, + the returned string is allocated here, and you must free() it + later. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/10, GPW +@MODIFIED : 1997/08/25, GPW: renamed from bt_postprocess_field(), and changed + to take the head of a list of simple values, + rather than the parent of that list +-------------------------------------------------------------------------- */ +char * +bt_postprocess_value (AST * value, ushort options, boolean replace) +{ + AST * simple_value; /* current simple value */ + boolean pasting; + ushort string_opts; /* what to do to individual strings */ + int tot_len; /* total length of pasted string */ + char * new_string; /* in case of string pasting */ + char * tmp_string; + boolean free_tmp; /* should we free() tmp_string? */ + + if (value == NULL) return NULL; + if (value->nodetype != BTAST_STRING && + value->nodetype != BTAST_NUMBER && + value->nodetype != BTAST_MACRO) + { + usage_error ("bt_postprocess_value: invalid AST node (not a value)"); + } + + + /* + * We will paste strings iff the user wants us to, and there are at least + * two simple values in the list headed by 'value'. + */ + + pasting = (options & BTO_PASTE) && (value->right); + + /* + * If we're to concatenate (paste) sub-strings, we need to know the + * total length of them. So make a pass over all the sub-strings + * (simple values), adding up their lengths. + */ + + tot_len = 0; /* these are out here to keep */ + new_string = NULL; /* gcc -Wall happy */ + tmp_string = NULL; + + if (pasting) + { + simple_value = value; + while (simple_value) + { + switch (simple_value->nodetype) + { + case BTAST_MACRO: + tot_len += bt_macro_length (simple_value->text); + break; + case BTAST_STRING: + tot_len += (simple_value->text) + ? (strlen (simple_value->text)) : 0; + break; + case BTAST_NUMBER: + tot_len += (simple_value->text) + ? (strlen (simple_value->text)) : 0; + break; + default: + internal_error ("simple value has bad nodetype (%d)", + (int) simple_value->nodetype); + } + simple_value = simple_value->right; + } + + /* Now allocate the buffer in which we'll accumulate the whole string */ + + new_string = (char *) calloc (tot_len+1, sizeof (char)); + } + + + /* + * Before entering the main loop, figure out just what + * bt_postprocess_string() is supposed to do -- eg. if pasting strings, + * we should not (yet) collapse whitespace. (That'll be done on the + * final, concatenated string -- assuming the caller put BTO_COLLAPSE in + * the options bitmap.) + */ + + if (pasting) + { + string_opts = options & ~BTO_COLLAPSE; /* turn off collapsing */ + } + else + { + string_opts = options; /* leave it alone */ + } + + /* + * Sanity check: if we continue blindly on, we might stupidly + * concatenate a macro name and a literal string. So check for that. + * Converting numbers is superficial, but requiring that it be done + * keeps people honest. + */ + + if (pasting && ! (options & (BTO_CONVERT|BTO_EXPAND))) + { + usage_error ("bt_postprocess_value(): " + "must convert numbers and expand macros " + "when pasting substrings"); + } + + /* + * Now the main loop to process each string, and possibly tack it onto + * new_string. + */ + + simple_value = value; + while (simple_value) + { + tmp_string = NULL; + free_tmp = FALSE; + + /* + * If this simple value is a macro and we're supposed to expand + * macros, then do so. We also have to post-process the string + * returned from the macro table, because they're stored there + * without whitespace collapsed; if we're supposed to be doing that + * to the current value (and we're not pasting), this is where it + * will get done. + */ + if (simple_value->nodetype == BTAST_MACRO && (options & BTO_EXPAND)) + { + tmp_string = bt_macro_text (simple_value->text, + simple_value->filename, + simple_value->line); + if (tmp_string != NULL) + { + tmp_string = strdup (tmp_string); + free_tmp = TRUE; + bt_postprocess_string (tmp_string, string_opts); + } + + if (replace) + { + simple_value->nodetype = BTAST_STRING; + if (simple_value->text) + free (simple_value->text); + simple_value->text = tmp_string; + free_tmp = FALSE; /* mustn't free, it's now in the AST */ + } + } + + /* + * If the current simple value is a literal string, then just + * post-process it. This will be done in-place if 'replace' is + * true, otherwise a copy of the string will be post-processed. + */ + else if (simple_value->nodetype == BTAST_STRING && simple_value->text) + { + if (replace) + { + tmp_string = simple_value->text; + } + else + { + tmp_string = strdup (simple_value->text); + free_tmp = TRUE; + } + + bt_postprocess_string (tmp_string, string_opts); + } + + /* + * Finally, if the current simple value is a number, change it to a + * string (depending on options) and get its value. We generally + * treat strings as numbers as equivalent, except of course numbers + * aren't post-processed -- there can't be any whitespace in them! + * The BTO_CONVERT option is mainly a sop to my strong-typing + * tendencies. + */ + if (simple_value->nodetype == BTAST_NUMBER) + { + if (replace && (options & BTO_CONVERT)) + simple_value->nodetype = BTAST_STRING; + + if (simple_value->text) + { + if (replace) + tmp_string = simple_value->text; + else + { + tmp_string = strdup (simple_value->text); + free_tmp = TRUE; + } + } + } + + if (pasting) + { + if (tmp_string) + strcat (new_string, tmp_string); + if (free_tmp) + free (tmp_string); + } + else + { + /* + * N.B. if tmp_string is NULL (eg. from a single undefined macro) + * we make a strdup() of the empty string -- this is so we can + * safely free() the string returned from this function + * at some future point. + * + * This strdup() seems to cause a 1-byte memory leak in some + * circumstances. I s'pose I should look into that some rainy + * afternoon... + */ + + new_string = (tmp_string != NULL) ? tmp_string : strdup (""); + } + + simple_value = simple_value->right; + } + + if (pasting) + { + int len; + + len = strlen (new_string); + assert (len <= tot_len); /* hope we alloc'd enough! */ + + bt_postprocess_string (new_string, options); + + /* + * If replacing data in the AST, delete all but first child of + * `field', and replace text for first child with new_string. + */ + + if (replace) + { + assert (value->right != NULL); /* there has to be > 1 simple value! */ + zzfree_ast (value->right); /* free from second simple value on */ + value->right = NULL; /* remind ourselves they're gone */ + if (value->text) /* free text of first simple value */ + free (value->text); + value->text = new_string; /* and replace it with concatenation */ + } + } + + return new_string; + +} /* bt_postprocess_value() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_field() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Postprocesses all the strings in a single "field = value" + assignment subtree. Just checks that 'field' does indeed + point to an BTAST_FIELD node (presumably the parent of a list + of simple values), downcases the field name, and calls + bt_postprocess_value() on the value. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/08/25, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +char * +bt_postprocess_field (AST * field, ushort options, boolean replace) +{ + if (field == NULL) return NULL; + if (field->nodetype != BTAST_FIELD) + usage_error ("bt_postprocess_field: invalid AST node (not a field)"); + + strlwr (field->text); /* downcase field name */ + return bt_postprocess_value (field->down, options, replace); + +} /* bt_postprocess_field() */ + + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_entry() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Postprocesses all the strings in an entry: collapse whitespace, + concatenate substrings, expands macros, and whatnot. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/10, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_postprocess_entry (AST * top, ushort options) +{ + AST *cur; + + if (top == NULL) return; /* not even an entry at all! */ + if (top->nodetype != BTAST_ENTRY) + usage_error ("bt_postprocess_entry: " + "invalid node type (not entry root)"); + strlwr (top->text); /* downcase entry type */ + + if (top->down == NULL) return; /* no children at all */ + + cur = top->down; + if (cur->nodetype == BTAST_KEY) + cur = cur->right; + + switch (top->metatype) + { + case BTE_REGULAR: + case BTE_MACRODEF: + { + while (cur) + { + bt_postprocess_field (cur, options, TRUE); + if (top->metatype == BTE_MACRODEF && ! (options & BTO_NOSTORE)) + bt_add_macro_value (cur, options); + + cur = cur->right; + } + break; + } + + case BTE_COMMENT: + case BTE_PREAMBLE: + bt_postprocess_value (cur, options, TRUE); + break; + default: + internal_error ("bt_postprocess_entry: unknown entry metatype (%d)", + (int) top->metatype); + } + +} /* bt_postprocess_entry() */ diff --git a/src/translators/btparse/prototypes.h b/src/translators/btparse/prototypes.h new file mode 100644 index 0000000..88beada --- /dev/null +++ b/src/translators/btparse/prototypes.h @@ -0,0 +1,47 @@ +/* ------------------------------------------------------------------------ +@NAME : prototypes.h +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Prototype declarations for functions from various places. + Only functions that are private to the library (but shared + between files within the library) are declared here. + Functions that are "exported from" the library (ie. usable + by and expected to be used by library user) are declared in + btparse.h. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/12, Greg Ward +@MODIFIED : +@VERSION : $Id: prototypes.h,v 1.14 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef PROTOTYPES_H +#define PROTOTYPES_H + +#include <stdio.h> +#include "btparse.h" /* for types */ + +/* util.c */ +#if !HAVE_STRLWR +char *strlwr (char *s); +#endif +#if !HAVE_STRUPR +char *strupr (char *s); +#endif + +/* macros.c */ +void init_macros (void); +void done_macros (void); + +/* bibtex_ast.c */ +void dump_ast (char *msg, AST *root); + +#endif /* PROTOTYPES_H */ diff --git a/src/translators/btparse/scan.c b/src/translators/btparse/scan.c new file mode 100644 index 0000000..b9899e4 --- /dev/null +++ b/src/translators/btparse/scan.c @@ -0,0 +1,615 @@ + +/* parser.dlg -- DLG Description of scanner + * + * Generated from: bibtex.g + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ + +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btconfig.h" +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#include "antlr.h" +#include "ast.h" +#include "tokens.h" +#include "dlgdef.h" +LOOKAHEAD +void zzerraction() +{ + (*zzerr)("invalid token"); + zzadvance(); + zzskip(); +} +/* + * D L G tables + * + * Generated from: parser.dlg + * + * 1989-1994 by Will Cohen, Terence Parr, and Hank Dietz + * Purdue University Electrical Engineering + * DLG Version 1.33 + */ + +#include "mode.h" + + + +static void act1() +{ + NLA = 1; + } + + +static void act2() +{ + NLA = AT; + at_sign (); + } + + +static void act3() +{ + NLA = 3; + newline (); + } + + +static void act4() +{ + NLA = COMMENT; + comment (); + } + + +static void act5() +{ + NLA = 5; + zzskip (); + } + + +static void act6() +{ + NLA = 6; + toplevel_junk (); + } + +static unsigned char shift0[257] = { + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 4, 2, 5, 5, 4, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 4, 5, 5, 5, 5, 3, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5 +}; + + +static void act7() +{ + NLA = 1; + } + + +static void act8() +{ + NLA = 7; + newline (); + } + + +static void act9() +{ + NLA = COMMENT; + comment (); + } + + +static void act10() +{ + NLA = 8; + zzskip (); + } + + +static void act11() +{ + NLA = NUMBER; + } + + +static void act12() +{ + NLA = NAME; + name (); + } + + +static void act13() +{ + NLA = LBRACE; + lbrace (); + } + + +static void act14() +{ + NLA = RBRACE; + rbrace (); + } + + +static void act15() +{ + NLA = ENTRY_OPEN; + lparen (); + } + + +static void act16() +{ + NLA = ENTRY_CLOSE; + rparen (); + } + + +static void act17() +{ + NLA = EQUALS; + } + + +static void act18() +{ + NLA = HASH; + } + + +static void act19() +{ + NLA = COMMA; + } + + +static void act20() +{ + NLA = 18; + start_string ('"'); + } + +static unsigned char shift1[257] = { + 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 3, 1, 14, 14, 3, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 3, 5, 13, 11, 5, 2, 5, + 14, 8, 9, 5, 5, 12, 5, 5, 5, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 10, 5, 5, 14, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 14, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 5, 7, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14 +}; + + +static void act21() +{ + NLA = 1; + } + + +static void act22() +{ + NLA = 19; + check_runaway_string (); + } + + +static void act23() +{ + NLA = 20; + zzreplchar (' '); zzmore (); + } + + +static void act24() +{ + NLA = 21; + open_brace (); + } + + +static void act25() +{ + NLA = 22; + close_brace (); + } + + +static void act26() +{ + NLA = 23; + lparen_in_string (); + } + + +static void act27() +{ + NLA = 24; + rparen_in_string (); + } + + +static void act28() +{ + NLA = STRING; + quote_in_string (); + } + + +static void act29() +{ + NLA = 26; + zzmore (); + } + +static unsigned char shift2[257] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 8, 3, 3, 3, 3, + 3, 6, 7, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 3, 5, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3 +}; + +#define DfaStates 38 +typedef unsigned char DfaState; + +static DfaState st0[7] = { + 1, 2, 3, 4, 5, 6, 38 +}; + +static DfaState st1[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st2[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st3[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st4[7] = { + 38, 7, 8, 9, 7, 9, 38 +}; + +static DfaState st5[7] = { + 38, 38, 38, 38, 5, 38, 38 +}; + +static DfaState st6[7] = { + 38, 38, 38, 6, 38, 6, 38 +}; + +static DfaState st7[7] = { + 38, 7, 8, 7, 7, 7, 38 +}; + +static DfaState st8[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st9[7] = { + 38, 7, 8, 9, 7, 9, 38 +}; + +static DfaState st10[16] = { + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 38, 38 +}; + +static DfaState st11[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st12[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st13[16] = { + 38, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 38 +}; + +static DfaState st14[16] = { + 38, 38, 38, 14, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st15[16] = { + 38, 38, 38, 38, 15, 16, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st16[16] = { + 38, 38, 38, 38, 16, 16, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st17[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st18[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st19[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st20[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st21[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st22[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st23[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st24[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st25[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st26[16] = { + 38, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 38 +}; + +static DfaState st27[11] = { + 28, 29, 30, 31, 32, 33, 34, 35, 36, 31, + 38 +}; + +static DfaState st28[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st29[11] = { + 38, 38, 37, 37, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st30[11] = { + 38, 38, 31, 31, 38, 38, 38, 38, 38, 31, + 38 +}; + +static DfaState st31[11] = { + 38, 38, 31, 31, 38, 38, 38, 38, 38, 31, + 38 +}; + +static DfaState st32[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st33[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st34[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st35[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st36[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st37[11] = { + 38, 38, 37, 37, 38, 38, 38, 38, 38, 38, + 38 +}; + + +DfaState *dfa[38] = { + st0, + st1, + st2, + st3, + st4, + st5, + st6, + st7, + st8, + st9, + st10, + st11, + st12, + st13, + st14, + st15, + st16, + st17, + st18, + st19, + st20, + st21, + st22, + st23, + st24, + st25, + st26, + st27, + st28, + st29, + st30, + st31, + st32, + st33, + st34, + st35, + st36, + st37 +}; + + +DfaState accepts[39] = { + 0, 1, 2, 3, 6, 5, 6, 0, 4, 6, + 0, 7, 8, 0, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 9, 0, 0, 21, 22, + 23, 29, 24, 25, 26, 27, 28, 22, 0 +}; + +void (*actions[30])() = { + zzerraction, + act1, + act2, + act3, + act4, + act5, + act6, + act7, + act8, + act9, + act10, + act11, + act12, + act13, + act14, + act15, + act16, + act17, + act18, + act19, + act20, + act21, + act22, + act23, + act24, + act25, + act26, + act27, + act28, + act29 +}; + +static DfaState dfa_base[] = { + 0, + 10, + 27 +}; + +static unsigned char *b_class_no[] = { + shift0, + shift1, + shift2 +}; + + + +#define ZZSHIFT(c) (b_class_no[zzauto][1+c]) +#define MAX_MODE 3 +#include "dlgauto.h" diff --git a/src/translators/btparse/stdpccts.h b/src/translators/btparse/stdpccts.h new file mode 100644 index 0000000..e232634 --- /dev/null +++ b/src/translators/btparse/stdpccts.h @@ -0,0 +1,31 @@ +#ifndef STDPCCTS_H +#define STDPCCTS_H +/* + * stdpccts.h -- P C C T S I n c l u d e + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#define GENAST +#define zzSET_SIZE 4 +#include "antlr.h" +#include "ast.h" +#include "tokens.h" +#include "dlgdef.h" +#include "mode.h" +#endif diff --git a/src/translators/btparse/string_util.c b/src/translators/btparse/string_util.c new file mode 100644 index 0000000..3713608 --- /dev/null +++ b/src/translators/btparse/string_util.c @@ -0,0 +1,695 @@ +/* ------------------------------------------------------------------------ +@NAME : string_util.c +@DESCRIPTION: Various string-processing utility functions: + bt_purify_string() + bt_change_case() + + and their helpers: + foreign_letter() + purify_special_char() +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/10/19, Greg Ward +@MODIFIED : 1997/11/25, GPW: renamed to from purify.c to string_util.c + added bt_change_case() and friends +@VERSION : $Id: string_util.c,v 1.10 1999/10/28 22:50:28 greg Rel $ +-------------------------------------------------------------------------- */ + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include "error.h" +#include "btparse.h" +#include "bt_debug.h" + + +/* + * These definitions should be fixed to be consistent with HTML + * entities, just for fun. And perhaps I should add entries for + * accented letters (at least those supported by TeX and HTML). + */ +typedef enum +{ + L_OTHER, /* not a "foreign" letter */ + L_OSLASH_L, /* Eastern European {\o} */ + L_OSLASH_U, + L_LSLASH_L, /* {\l} */ + L_LSLASH_U, + L_OELIG_L, /* Latin {\oe} ligature */ + L_OELIG_U, + L_AELIG_L, /* {\ae} ligature */ + L_AELIG_U, + L_SSHARP_L, /* German "sharp s" {\ss} */ + L_SSHARP_U, + L_ACIRCLE_L, /* Nordic {\aa} */ + L_ACIRCLE_U, + L_INODOT_L, /* undotted i: {\i} */ + L_JNODOT_L /* {\j} */ +} bt_letter; + + +static const char * uc_version[] = +{ + NULL, /* L_OTHER */ + "\\O", /* L_OSLASH_L */ + "\\O", /* L_OSLASH_U */ + "\\L", /* L_LSLASH_L */ + "\\L", /* L_LSLASH_U */ + "\\OE", /* L_OELIG_L */ + "\\OE", /* L_OELIG_U */ + "\\AE", /* L_AELIG_L */ + "\\AE", /* L_AELIG_U */ + "SS", /* L_SSHARP_L -- for LaTeX 2.09 */ + "\\SS", /* L_SSHARP_U */ + "\\AA", /* L_ACIRCLE_L */ + "\\AA", /* L_ACIRCLE_U */ + "I", /* L_INODOT_L */ + "J" /* L_JNODOT_L */ +}; + +static const char * lc_version[] = +{ + NULL, /* L_OTHER */ + "\\o", /* L_OSLASH_L */ + "\\o", /* L_OSLASH_U */ + "\\l", /* L_LSLASH_L */ + "\\l", /* L_LSLASH_U */ + "\\oe", /* L_OELIG_L */ + "\\oe", /* L_OELIG_U */ + "\\ae", /* L_AELIG_L */ + "\\ae", /* L_AELIG_U */ + "\\ss", /* L_SSHARP_L */ + "\\ss", /* L_SSHARP_U */ + "\\aa", /* L_ACIRCLE_L */ + "\\aa", /* L_ACIRCLE_U */ + "\\i", /* L_INODOT_L */ + "\\j" /* L_JNODOT_L */ +}; + + + +/* ------------------------------------------------------------------------ +@NAME : foreign_letter() +@INPUT : str + start + stop +@OUTPUT : letter +@RETURNS : TRUE if the string delimited by start and stop is a foreign + letter control sequence +@DESCRIPTION: Determines if a character sequence is one of (La)TeX's + "foreign letter" control sequences (l, o, ae, oe, aa, ss, plus + uppercase versions). If `letter' is non-NULL, returns which + letter was found in it (as a bt_letter value). +@CALLS : +@CALLERS : purify_special_char() +@CREATED : 1997/10/19, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static boolean +foreign_letter (char *str, int start, int stop, bt_letter * letter) +{ + char c1, c2; + bt_letter dummy; + + + /* + * This is written for speed, not flexibility -- adding new foreign + * letters would be trying and vexatious. + * + * N.B. my gold standard list of foreign letters is Kopka and Daly's + * *A Guide to LaTeX 2e*, section 2.5.6. + */ + + if (letter == NULL) /* so we can assign to *letter */ + letter = &dummy; /* without compunctions */ + *letter = L_OTHER; /* assume not a "foreign" letter */ + + c1 = str[start+0]; /* only two characters that we're */ + c2 = str[start+1]; /* interested in */ + + switch (stop - start) + { + case 1: /* one-character control sequences */ + switch (c1) /* (\o and \l) */ + { + case 'o': + *letter = L_OSLASH_L; return TRUE; + case 'O': + *letter = L_OSLASH_U; return TRUE; + case 'l': + *letter = L_LSLASH_L; return TRUE; + case 'L': + *letter = L_LSLASH_L; return TRUE; + case 'i': + *letter = L_INODOT_L; return TRUE; + case 'j': + *letter = L_JNODOT_L; return TRUE; + default: + return FALSE; + } + break; + case 2: /* two character control sequences */ + switch (c1) /* (\oe, \ae, \aa, and \ss) */ + { + case 'o': + if (c2 == 'e') { *letter = L_OELIG_L; return TRUE; } + case 'O': + if (c2 == 'E') { *letter = L_OELIG_U; return TRUE; } + + /* BibTeX 0.99 does not handle \aa and \AA -- but I do!*/ + case 'a': + if (c2 == 'e') + { *letter = L_AELIG_L; return TRUE; } + else if (c2 == 'a') + { *letter = L_ACIRCLE_L; return TRUE; } + else + return FALSE; + case 'A': + if (c2 == 'E') + { *letter = L_AELIG_U; return TRUE; } + else if (c2 == 'A') + { *letter = L_ACIRCLE_U; return TRUE; } + else + return FALSE; + + /* uppercase sharp-s -- new with LaTeX 2e (so far all I do + * is recognize it as a "foreign" letter) + */ + case 's': + if (c2 == 's') + { *letter = L_SSHARP_L; return TRUE; } + else + return FALSE; + case 'S': + if (c2 == 'S') + { *letter = L_SSHARP_U; return TRUE; } + else + return FALSE; + } + break; + default: + return FALSE; + } /* switch on length of control sequence */ + + internal_error ("foreign_letter(): should never reach end of function"); + return FALSE; /* to keep gcc -Wall happy */ + +} /* foreign_letter */ + + +/* ------------------------------------------------------------------------ +@NAME : purify_special_char() +@INPUT : *src, *dst - pointers into the input and output strings +@OUTPUT : *src - updated to point to the closing brace of the + special char + *dst - updated to point to the next available spot + for copying text to +@RETURNS : +@DESCRIPTION: "Purifies" a BibTeX special character. On input, *src should + point to the opening brace of a special character (ie. the + brace must be at depth 0 of the whole string, and the + character immediately following it must be a backslash). + *dst should point to the next spot to copy into the output + (purified) string. purify_special_char() will skip over the + opening brace and backslash; if the control sequence is one + of LaTeX's foreign letter sequences (as determined by + foreign_letter()), then it is simply copied to *dst. + Otherwise the control sequence is skipped. In either case, + text after the control sequence is either copied (alphabetic + characters) or skipped (anything else, including hyphens, + ties, and digits). +@CALLS : foreign_letter() +@CALLERS : bt_purify_string() +@CREATED : 1997/10/19, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +purify_special_char (char *str, int * src, int * dst) +{ + int depth; + int peek; + + assert (str[*src] == '{' && str[*src + 1] == '\\'); + depth = 1; + + *src += 2; /* jump to start of control sequence */ + peek = *src; /* scan to end of control sequence */ + while (isalpha (str[peek])) + peek++; + if (peek == *src) /* in case of single-char, non-alpha */ + peek++; /* control sequence (eg. {\'e}) */ + + if (foreign_letter (str, *src, peek, NULL)) + { + assert (peek - *src == 1 || peek - *src == 2); + str[(*dst)++] = str[(*src)++]; /* copy first char */ + if (*src < peek) /* copy second char, downcasing */ + str[(*dst)++] = tolower (str[(*src)++]); + } + else /* not a foreign letter -- skip */ + { /* the control sequence entirely */ + *src = peek; + } + + while (str[*src]) + { + switch (str[*src]) + { + case '{': + depth++; + (*src)++; + break; + case '}': + depth--; + if (depth == 0) return; /* done with special char */ + (*src)++; + break; + default: + if (isalpha (str[*src])) /* copy alphabetic chars */ + str[(*dst)++] = str[(*src)++]; + else /* skip everything else */ + (*src)++; + } + } + + /* + * If we get here, we have unbalanced braces -- the '}' case should + * always hit a depth == 0 point if braces are balanced. No warning, + * though, because a) BibTeX doesn't warn about purifying unbalanced + * strings, and b) we (should have) already warned about it in the + * lexer. + */ + +} /* purify_special_char() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_purify_string() +@INOUT : instr +@INPUT : options +@OUTPUT : +@RETURNS : instr - same as input string, but modified in place +@DESCRIPTION: "Purifies" a BibTeX string. This consists of copying + alphanumeric characters, converting hyphens and ties to + space, copying spaces, and skipping everything else. (Well, + almost -- special characters are handled specially, of + course. Basically, accented letters have the control + sequence skipped, while foreign letters have the control + sequence preserved in a reasonable manner. See + purify_special_char() for details.) +@CALLS : purify_special_char() +@CALLERS : +@CREATED : 1997/10/19, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_purify_string (char * string, ushort options) +{ + int src, /* both indeces into string */ + dst; + int depth; /* brace depth in string */ + unsigned orig_len; + + /* + * Since purification always copies or deletes chars, outstr will + * be no longer than string -- so nothing fancy is required to put + * an upper bound on its eventual size. + */ + + depth = 0; + src = 0; + dst = 0; + orig_len = strlen (string); + + DBG_ACTION (1, printf ("bt_purify_string(): input = %p (%s)\n", + string, string)); + + while (string[src] != (char) 0) + { + DBG_ACTION (2, printf (" next: >%c<: ", string[src])); + switch (string[src]) + { + case '~': /* "separator" characters -- */ + case '-': /* replaced with space */ + case ' ': /* and copy an actual space */ + string[dst++] = ' '; + src++; + DBG_ACTION (2, printf ("replacing with space")); + break; + case '{': + if (depth == 0 && string[src+1] == '\\') + { + DBG_ACTION (2, printf ("special char found")); + purify_special_char (string, &src, &dst); + } + else + { + DBG_ACTION (2, printf ("ordinary open brace")); + src++; + } + depth++; + break; + case '}': + DBG_ACTION (2, printf ("close brace")); + depth--; + src++; + break; + default: + if (isalnum (string[src])) /* any alphanumeric char -- */ + { + DBG_ACTION (2, printf ("alphanumeric -- copying")); + string[dst++] = string[src++]; /* copy it */ + } + else /* anything else -- skip it */ + { + DBG_ACTION (2, printf ("non-separator, non-brace, non-alpha")); + src++; + } + } /* switch string[src] */ + + DBG_ACTION (2, printf ("\n")); + + } /* while string[src] */ + + DBG_ACTION (1, printf ("bt_purify_string(): depth on exit: %d\n", depth)); + + string[dst] = (char) 0; + assert (strlen (string) <= orig_len); +} /* bt_purify_string() */ + + +/* ====================================================================== + * Case-transformation stuff + */ + + +/* ------------------------------------------------------------------------ +@NAME : convert_special_char() +@INPUT : transform +@INOUT : string + src + dst + start_sentence + after_colon +@RETURNS : +@DESCRIPTION: Does case conversion on a special character. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/11/25, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +convert_special_char (char transform, + char * string, + int * src, + int * dst, + boolean * start_sentence, + boolean * after_colon) +{ + int depth; + boolean done_special; + int cs_end; + int cs_len; /* counting the backslash */ + bt_letter letter; + const char * repl; + int repl_len; + +#ifndef ALLOW_WARNINGS + repl = NULL; /* silence "might be used" */ + /* uninitialized" warning */ +#endif + + /* First, copy just the opening brace */ + string[(*dst)++] = string[(*src)++]; + + /* + * Now loop over characters inside the braces -- stop when we reach + * the matching close brace, or when the string ends. + */ + depth = 1; /* because we're in a special char */ + done_special = FALSE; + + while (string[*src] != 0 && !done_special) + { + switch (string[*src]) + { + case '\\': /* a control sequence */ + { + cs_end = *src+1; /* scan over chars of c.s. */ + while (isalpha (string[cs_end])) + cs_end++; + + /* + * OK, now *src points to the backslash (so src+*1 points to + * first char. of control sequence), and cs_end points to + * character immediately following end of control sequence. + * Thus we analyze [*src+1..cs_end] to determine if the control + * sequence is a foreign letter, and use (cs_end - (*src+1) + 1) + * = (cs_end - *src) as the length of the control sequence. + */ + + cs_len = cs_end - *src; /* length of cs, counting backslash */ + + if (foreign_letter (string, *src+1, cs_end, &letter)) + { + if (letter == L_OTHER) + internal_error ("impossible foreign letter"); + + switch (transform) + { + case 'u': + repl = uc_version[(int) letter]; + break; + case 'l': + repl = lc_version[(int) letter]; + break; + case 't': + if (*start_sentence || *after_colon) + { + repl = uc_version[(int) letter]; + *start_sentence = *after_colon = FALSE; + } + else + { + repl = lc_version[(int) letter]; + } + break; + default: + internal_error ("impossible case transform \"%c\"", + transform); + } + + repl_len = strlen (repl); + if (repl_len > cs_len) + internal_error + ("replacement text longer than original cs"); + + strncpy (string + *dst, repl, repl_len); + *src = cs_end; + *dst += repl_len; + } /* control sequence is a foreign letter */ + else + { + /* not a foreign letter -- just copy the control seq. as is */ + + + strncpy (string + *dst, string + *src, cs_end - *src); + *src += cs_len; + assert (*src == cs_end); + *dst += cs_len; + } /* control sequence not a foreign letter */ + + break; + } /* case: '\\' */ + + case '{': + { + string[(*dst)++] = string[(*src)++]; + depth++; + break; + } + + case '}': + { + string[(*dst)++] = string[(*src)++]; + depth--; + if (depth == 0) + done_special = TRUE; + break; + } + + default: /* any other character */ + { + switch (transform) + { + /* + * Inside special chars, lowercase and title caps are same. + * (At least, that's bibtex's convention. I might change this + * at some point to be a bit smarter.) + */ + case 'l': + case 't': + string[(*dst)++] = tolower (string[(*src)++]); + break; + case 'u': + string[(*dst)++] = toupper (string[(*src)++]); + break; + default: + internal_error ("impossible case transform \"%c\"", + transform); + } + } /* default char */ + + } /* switch: current char */ + + } /* while: string or special char not done */ + +} /* convert_special_char() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_change_case() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Converts a string (in-place) to either uppercase, lowercase, + or "title capitalization"> +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/11/25, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_change_case (char transform, + char * string, + ushort options) +{ + int len; + int depth; + int src, dst; /* indeces into string */ + boolean start_sentence; + boolean after_colon; + + src = dst = 0; + len = strlen (string); + depth = 0; + + start_sentence = TRUE; + after_colon = FALSE; + + while (string[src] != 0) + { + switch (string[src]) + { + case '{': + + /* + * At start of special character? The entire special char. + * will be handled here, as follows: + * - text at any brace-depth within the s.c. is case-mangled; + * punctuation (sentence endings, colons) are ignored + * - control sequences are left alone, unless they are + * one of the "foreign letter" control sequences, in + * which case they're converted to the appropriate string + * according to the uc_version or lc_version tables. + */ + if (depth == 0 && string[src+1] == '\\') + { + convert_special_char (transform, string, &src, &dst, + &start_sentence, &after_colon); + } + + /* + * Otherwise, it's just something in braces. This is probably + * a proper noun or something encased in braces to protect it + * from case-mangling, so we do not case-mangle it. However, + * we *do* switch out of start_sentence or after_colon mode if + * we happen to be there (otherwise we'll do the wrong thing + * once we're out of the braces). + */ + else + { + string[dst++] = string[src++]; + start_sentence = after_colon = FALSE; + depth++; + } + break; + + case '}': + string[dst++] = string[src++]; + depth--; + break; + + /* + * Sentence-ending punctuation and colons are handled separately + * to allow for exact mimicing of BibTeX's behaviour. I happen + * to think that this behaviour (capitalize first word of sentences + * in a title) is better than BibTeX's, but I want to keep my + * options open for a future goal of perfect compatability. + */ + case '.': + case '?': + case '!': + start_sentence = TRUE; + string[dst++] = string[src++]; + break; + + case ':': + after_colon = TRUE; + string[dst++] = string[src++]; + break; + + default: + if (isspace (string[src])) + { + string[dst++] = string[src++]; + } + else + { + if (depth == 0) + { + switch (transform) + { + case 'u': + string[dst++] = toupper (string[src++]); + break; + case 'l': + string[dst++] = tolower (string[src++]); + break; + case 't': + if (start_sentence || after_colon) + { + /* + * XXX BibTeX only preserves case of character + * immediately after a colon; I do two things + * differently: first, I pay attention to sentence + * punctuation, and second I force uppercase + * at start of sentence or after a colon. + */ + string[dst++] = toupper (string[src++]); + start_sentence = after_colon = FALSE; + } + else + { + string[dst++] = tolower (string[src++]); + } + break; + default: + internal_error ("impossible case transform \"%c\"", + transform); + } + } /* depth == 0 */ + else + { + string[dst++] = string[src++]; + } + } /* not blank */ + } /* switch on current character */ + + } /* while not at end of string */ + +} /* bt_change_case */ diff --git a/src/translators/btparse/sym.c b/src/translators/btparse/sym.c new file mode 100644 index 0000000..2426dea --- /dev/null +++ b/src/translators/btparse/sym.c @@ -0,0 +1,372 @@ +/* + * Simple symbol table manager using coalesced chaining to resolve collisions + * + * Doubly-linked lists are used for fast removal of entries. + * + * 'sym.h' must have a definition for typedef "Sym". Sym must include at + * minimum the following fields: + * + * ... + * char *symbol; + * struct ... *next, *prev, **head, *scope; + * unsigned int hash; + * ... + * + * 'template.h' can be used as a template to create a 'sym.h'. + * + * 'head' is &(table[hash(itself)]). + * The hash table is not resizable at run-time. + * The scope field is used to link all symbols of a current scope together. + * Scope() sets the current scope (linked list) to add symbols to. + * Any number of scopes can be handled. The user passes the address of + * a pointer to a symbol table + * entry (INITIALIZED TO NULL first time). + * + * Available Functions: + * + * zzs_init(s1,s2) -- Create hash table with size s1, string table size s2. + * zzs_done() -- Free hash and string table created with zzs_init(). + * zzs_add(key,rec)-- Add 'rec' with key 'key' to the symbol table. + * zzs_newadd(key) -- create entry; add using 'key' to the symbol table. + * zzs_get(key) -- Return pointer to last record entered under 'key' + * Else return NULL + * zzs_del(p) -- Unlink the entry associated with p. This does + * NOT free 'p' and DOES NOT remove it from a scope + * list. If it was a part of your intermediate code + * tree or another structure. It will still be there. + * It is only removed from further consideration + * by the symbol table. + * zzs_keydel(s) -- Unlink the entry associated with key s. + * Calls zzs_del(p) to unlink. + * zzs_scope(sc) -- Specifies that everything added to the symbol + * table with zzs_add() is added to the list (scope) + * 'sc'. 'sc' is of 'Sym **sc' type and must be + * initialized to NULL before trying to add anything + * to it (passing it to zzs_scope()). Scopes can be + * switched at any time and merely links a set of + * symbol table entries. If a NULL pointer is + * passed, the current scope is returned. + * zzs_rmscope(sc) -- Remove (zzs_del()) all elements of scope 'sc' + * from the symbol table. The entries are NOT + * free()'d. A pointer to the first + * element in the "scope" is returned. The user + * can then manipulate the list as he/she chooses + * (such as freeing them all). NOTE that this + * function sets your scope pointer to NULL, + * but returns a pointer to the list for you to use. + * zzs_stat() -- Print out the symbol table and some relevant stats. + * zzs_new(key) -- Create a new record with calloc() of type Sym. + * Add 'key' to the string table and make the new + * records 'symbol' pointer point to it. + * zzs_strdup(s) -- Add s to the string table and return a pointer + * to it. Very fast allocation routine + * and does not require strlen() nor calloc(). + * + * Example: + * + * #include <stdio.h> + * #include "sym.h" + * + * main() + * { + * Sym *scope1=NULL, *scope2=NULL, *a, *p; + * + * zzs_init(101, 100); + * + * a = zzs_new("Apple"); zzs_add(a->symbol, a); -- No scope + * zzs_scope( &scope1 ); -- enter scope 1 + * a = zzs_new("Plum"); zzs_add(a->symbol, a); + * zzs_scope( &scope2 ); -- enter scope 2 + * a = zzs_new("Truck"); zzs_add(a->symbol, a); + * + * p = zzs_get("Plum"); + * if ( p == NULL ) fprintf(stderr, "Hmmm...Can't find 'Plum'\n"); + * + * p = zzs_rmscope(&scope1) + * for (; p!=NULL; p=p->scope) {printf("Scope1: %s\n", p->symbol);} + * p = zzs_rmscope(&scope2) + * for (; p!=NULL; p=p->scope) {printf("Scope2: %s\n", p->symbol);} + * } + * + * Terence Parr + * Purdue University + * February 1990 + * + * CHANGES + * + * Terence Parr + * May 1991 + * Renamed functions to be consistent with ANTLR + * Made HASH macro + * Added zzs_keydel() + * Added zzs_newadd() + * Fixed up zzs_stat() + * + * July 1991 + * Made symbol table entry save its hash code for fast comparison + * during searching etc... + */ + +/*#include "bt_config.h"*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#ifdef MEMCHK +#include "trax.h" +#endif +#include "sym.h" +/*#include "my_dmalloc.h"*/ + +#define StrSame 0 + +static Sym **CurScope = NULL; +static unsigned size = 0; +static Sym **table=NULL; +static char *strings; +static char *strp; +static int strsize = 0; + +void +zzs_init(int sz, int strs) +{ + if ( sz <= 0 || strs <= 0 ) return; + table = (Sym **) calloc(sz, sizeof(Sym *)); + if ( table == NULL ) + { + fprintf(stderr, "Cannot allocate table of size %d\n", sz); + exit(1); + } + strings = (char *) calloc(strs, sizeof(char)); + if ( strings == NULL ) + { + fprintf(stderr, "Cannot allocate string table of size %d\n", strs); + exit(1); + } + size = sz; + strsize = strs; + strp = strings; +} + + +void +zzs_free(void) +{ + unsigned i; + Sym *cur, *next; + + for (i = 0; i < size; i++) + { + cur = table[i]; + while (cur != NULL) + { + next = cur->next; + free (cur); + cur = next; + } + } +} + + +void +zzs_done(void) +{ + if ( table != NULL ) free( table ); + if ( strings != NULL ) free( strings ); +} + +void +zzs_add(char *key, register Sym *rec) +{ + register unsigned int h=0; + register char *p=key; + + HASH_FUN(p, h); + rec->hash = h; /* save hash code for fast comp later */ + h %= size; + + if ( CurScope != NULL ) {rec->scope = *CurScope; *CurScope = rec;} + rec->next = table[h]; /* Add to doubly-linked list */ + rec->prev = NULL; + if ( rec->next != NULL ) (rec->next)->prev = rec; + table[h] = rec; + rec->head = &(table[h]); +} + +Sym * +zzs_get(char *key) +{ + register unsigned int h=0; + register char *p=key; + register Sym *q; + + HASH_FUN(p, h); + + for (q = table[h%size]; q != NULL; q = q->next) + { + if ( q->hash == h ) /* do we even have a chance of matching? */ + if ( strcasecmp(key, q->symbol) == StrSame ) return( q ); + } + return( NULL ); +} + +/* + * Unlink p from the symbol table. Hopefully, it's actually in the + * symbol table. + * + * If p is not part of a bucket chain of the symbol table, bad things + * will happen. + * + * Will do nothing if all list pointers are NULL + */ +void +zzs_del(register Sym *p) +{ + if ( p == NULL ) {fprintf(stderr, "zzs_del(NULL)\n"); exit(1);} + if ( p->prev == NULL ) /* Head of list */ + { + register Sym **t = p->head; + + if ( t == NULL ) return; /* not part of symbol table */ + (*t) = p->next; + if ( (*t) != NULL ) (*t)->prev = NULL; + } + else + { + (p->prev)->next = p->next; + if ( p->next != NULL ) (p->next)->prev = p->prev; + } + p->next = p->prev = NULL; /* not part of symbol table anymore */ + p->head = NULL; +} + +void +zzs_keydel(char *key) +{ + Sym *p = zzs_get(key); + + if ( p != NULL ) zzs_del( p ); +} + +/* S c o p e S t u f f */ + +/* Set current scope to 'scope'; return current scope if 'scope' == NULL */ +Sym ** +zzs_scope(Sym **scope) +{ + if ( scope == NULL ) return( CurScope ); + CurScope = scope; + return( scope ); +} + +/* Remove a scope described by 'scope'. Return pointer to 1st element in scope */ +Sym * +zzs_rmscope(register Sym **scope) +{ + register Sym *p; + Sym *start; + + if ( scope == NULL ) return(NULL); + start = p = *scope; + for (; p != NULL; p=p->scope) { zzs_del( p ); } + *scope = NULL; + return( start ); +} + +void +zzs_stat(void) +{ + static unsigned short count[20]; + unsigned int i,n=0,low=0, hi=0; + register Sym **p; + float avg=0.0; + + for (i=0; i<20; i++) count[i] = 0; + for (p=table; p<&(table[size]); p++) + { + register Sym *q = *p; + unsigned int len; + + if ( q != NULL && low==0 ) low = p-table; + len = 0; + if ( q != NULL ) printf("[%d]", p-table); + while ( q != NULL ) + { + len++; + n++; + printf(" %s", q->symbol); + q = q->next; + if ( q == NULL ) printf("\n"); + } + if ( len>=20 ) printf("zzs_stat: count table too small\n"); + else count[len]++; + if ( *p != NULL ) hi = p-table; + } + + printf("Storing %d recs used %d hash positions out of %d\n", + n, size-count[0], size); + printf("%f %% utilization\n", + ((float)(size-count[0]))/((float)size)); + for (i=0; i<20; i++) + { + if ( count[i] != 0 ) + { + avg += (((float)(i*count[i]))/((float)n)) * i; + printf("Buckets of len %d == %d (%f %% of recs)\n", + i, count[i], 100.0*((float)(i*count[i]))/((float)n)); + } + } + printf("Avg bucket length %f\n", avg); + printf("Range of hash function: %d..%d\n", low, hi); +} + +/* + * Given a string, this function allocates and returns a pointer to a + * symbol table record whose "symbol" pointer is reset to a position + * in the string table. + */ +Sym * +zzs_new(char *text) +{ + Sym *p; + char *zzs_strdup(register char *s); + + if ( (p = (Sym *) calloc(1,sizeof(Sym))) == 0 ) + { + fprintf(stderr,"Out of memory\n"); + exit(1); + } + p->symbol = zzs_strdup(text); + + return p; +} + +/* create a new symbol table entry and add it to the symbol table */ +Sym * +zzs_newadd(char *text) +{ + Sym *p = zzs_new(text); + if ( p != NULL ) zzs_add(text, p); + return p; +} + +/* Add a string to the string table and return a pointer to it. + * Bump the pointer into the string table to next avail position. + */ +char * +zzs_strdup(register char *s) +{ + register char *start=strp; + + while ( *s != '\0' ) + { + if ( strp >= &(strings[strsize-2]) ) + { + fprintf(stderr, "sym: string table overflow (%d chars)\n", strsize); + exit(-1); + } + *strp++ = *s++; + } + *strp++ = '\0'; + + return( start ); +} diff --git a/src/translators/btparse/sym.h b/src/translators/btparse/sym.h new file mode 100644 index 0000000..78983d1 --- /dev/null +++ b/src/translators/btparse/sym.h @@ -0,0 +1,33 @@ +#include <ctype.h> + +/* + * Declarations for symbol table in sym.c + */ + +/* define some hash function */ +#ifndef HASH_FUN +#define HASH_FUN(p, h) while ( *p != '\0' ) h = (h<<1) + tolower (*p++); +#endif + +/* minimum symbol table record */ +typedef struct _sym +{ + char *symbol; /* the macro name */ + char *text; /* its expansion */ + struct _sym *next, *prev, **head, *scope; + unsigned int hash; +} Sym, *SymPtr; + +void zzs_init(int, int); +void zzs_free(void); +void zzs_done(void); +void zzs_add(char *, Sym *); +Sym *zzs_get(char *); +void zzs_del(Sym *); +void zzs_keydel(char *); +Sym **zzs_scope(Sym **); +Sym *zzs_rmscope(Sym **); +void zzs_stat(void); +Sym *zzs_new(char *); +Sym *zzs_newadd(char *); +char *zzs_strdup(char *); diff --git a/src/translators/btparse/tex_tree.c b/src/translators/btparse/tex_tree.c new file mode 100644 index 0000000..0d7d33d --- /dev/null +++ b/src/translators/btparse/tex_tree.c @@ -0,0 +1,414 @@ +/* ------------------------------------------------------------------------ +@NAME : tex_tree.c +@DESCRIPTION: Functions for dealing with strings of TeX code: converting + them to tree representation, traversing the trees to glean + useful information, and converting back to string form. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, Greg Ward +@MODIFIED : +@VERSION : $Id: tex_tree.c,v 1.4 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "error.h" +#include "btparse.h" +/*#include "my_dmalloc.h"*/ + +/* blech! temp hack until I make error.c perfect and magical */ +#define string_warning(w) fprintf (stderr, w); + +typedef struct treestack_s +{ + bt_tex_tree * node; + struct treestack_s + * prev, + * next; +} treestack; + + +/* ---------------------------------------------------------------------- + * Stack manipulation functions + */ + +/* ------------------------------------------------------------------------ +@NAME : push_treestack() +@INPUT : *stack + node +@OUTPUT : *stack +@RETURNS : +@DESCRIPTION: Creates and initializes new node in a stack, and pushes it + onto the stack. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +push_treestack (treestack **stack, bt_tex_tree *node) +{ + treestack *newtop; + + newtop = (treestack *) malloc (sizeof (treestack)); + newtop->node = node; + newtop->next = NULL; + newtop->prev = *stack; + + if (*stack != NULL) /* stack already has some entries */ + { + (*stack)->next = newtop; + *stack = newtop; + } + + *stack = newtop; + +} /* push_treestack() */ + + +/* ------------------------------------------------------------------------ +@NAME : pop_treestack +@INPUT : *stack +@OUTPUT : *stack +@RETURNS : +@DESCRIPTION: Pops an entry off of a stack of tex_tree nodes, frees up + the wrapper treestack node, and returns the popped tree node. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static bt_tex_tree * +pop_treestack (treestack **stack) +{ + treestack * oldtop; + bt_tex_tree * node; + + if (*stack == NULL) + internal_error ("attempt to pop off empty stack"); + oldtop = (*stack)->prev; + node = (*stack)->node; + free (*stack); + if (oldtop != NULL) + oldtop->next = NULL; + *stack = oldtop; + return node; + +} /* pop_treestack() */ + + +/* ---------------------------------------------------------------------- + * Tree creation/destruction functions + */ + +/* ------------------------------------------------------------------------ +@NAME : new_tex_tree +@INPUT : start +@OUTPUT : +@RETURNS : pointer to newly-allocated node +@DESCRIPTION: Allocates and initializes a bt_tex_tree node. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static bt_tex_tree * +new_tex_tree (char *start) +{ + bt_tex_tree * node; + + node = (bt_tex_tree *) malloc (sizeof (bt_tex_tree)); + node->start = start; + node->len = 0; + node->child = node->next = NULL; + return node; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_build_tex_tree +@INPUT : string +@OUTPUT : +@RETURNS : pointer to a complete tree; call bt_free_tex_tree() to free + the entire tree +@DESCRIPTION: Traverses a string looking for TeX groups ({...}), and builds + a tree containing pointers into the string and describing + its brace-structure. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +bt_tex_tree * +bt_build_tex_tree (char * string) +{ + int i; + int depth; + int len; + bt_tex_tree + * top, + * cur, + * new; + treestack + * stack; + + i = 0; + depth = 0; + len = strlen (string); + top = new_tex_tree (string); + stack = NULL; + + cur = top; + + while (i < len) + { + switch (string[i]) + { + case '{': /* go one level deeper */ + { + if (i == len-1) /* open brace in last character? */ + { + string_warning ("unbalanced braces: { at end of string"); + goto error; + } + + new = new_tex_tree (string+i+1); + cur->child = new; + push_treestack (&stack, cur); + cur = new; + depth++; + break; + } + case '}': /* pop level(s) off */ + { + while (i < len && string[i] == '}') + { + if (stack == NULL) + { + string_warning ("unbalanced braces: extra }"); + goto error; + } + cur = pop_treestack (&stack); + depth--; + i++; + } + i--; + + if (i == len-1) /* reached end of string? */ + { + if (depth > 0) /* but not at depth 0 */ + { + string_warning ("unbalanced braces: not enough }'s"); + goto error; + } + + /* + * if we get here, do nothing -- we've reached the end of + * the string and are at depth 0, so will just fall out + * of the while loop at the end of this iteration + */ + } + else /* still have characters left */ + { /* to worry about */ + new = new_tex_tree (string+i+1); + cur->next = new; + cur = new; + } + + break; + } + default: + { + cur->len++; + } + + } /* switch */ + + i++; + + } /* while i */ + + if (depth > 0) + { + string_warning ("unbalanced braces (not enough }'s)"); + goto error; + } + + return top; + +error: + bt_free_tex_tree (&top); + return NULL; + +} /* bt_build_tex_tree() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_tex_tree +@INPUT : *top +@OUTPUT : *top (set to NULL after it's free()'d) +@RETURNS : +@DESCRIPTION: Frees up an entire tree created by bt_build_tex_tree(). +@GLOBALS : +@CALLS : itself, free() +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_free_tex_tree (bt_tex_tree **top) +{ + if ((*top)->child) bt_free_tex_tree (&(*top)->child); + if ((*top)->next) bt_free_tex_tree (&(*top)->next); + free (*top); + *top = NULL; +} + + + +/* ---------------------------------------------------------------------- + * Tree traversal functions + */ + +/* ------------------------------------------------------------------------ +@NAME : bt_dump_tex_tree +@INPUT : node + depth + stream +@OUTPUT : +@RETURNS : +@DESCRIPTION: Dumps a TeX tree: one node per line, depth indented according + to depth. +@GLOBALS : +@CALLS : itself +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream) +{ + char buf[256]; + + if (node == NULL) + return; + + if (node->len > 255) + internal_error ("augughgh! buf too small"); + strncpy (buf, node->start, node->len); + buf[node->len] = (char) 0; + + fprintf (stream, "%*s[%s]\n", depth*2, "", buf); + + bt_dump_tex_tree (node->child, depth+1, stream); + bt_dump_tex_tree (node->next, depth, stream); + +} + + +/* ------------------------------------------------------------------------ +@NAME : count_length +@INPUT : node +@OUTPUT : +@RETURNS : +@DESCRIPTION: Counts the total number of characters that will be needed + to print a string reconstructed from a TeX tree. (Length + of string in each node, plus two [{ and }] for each down + edge.) +@GLOBALS : +@CALLS : itself +@CALLERS : bt_flatten_tex_tree +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +count_length (bt_tex_tree *node) +{ + if (node == NULL) return 0; + return + node->len + + (node->child ? 2 : 0) + + count_length (node->child) + + count_length (node->next); +} + + +/* ------------------------------------------------------------------------ +@NAME : flatten_tree +@INPUT : node + *offset +@OUTPUT : *buf + *offset +@RETURNS : +@DESCRIPTION: Dumps a reconstructed string ("flat" representation of the + tree) into a pre-allocated buffer, starting at a specified + offset. +@GLOBALS : +@CALLS : itself +@CALLERS : bt_flatten_tex_tree +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +flatten_tree (bt_tex_tree *node, char *buf, int *offset) +{ + strncpy (buf + *offset, node->start, node->len); + *offset += node->len; + + if (node->child) + { + buf[(*offset)++] = '{'; + flatten_tree (node->child, buf, offset); + buf[(*offset)++] = '}'; + } + + if (node->next) + { + flatten_tree (node->next, buf, offset); + } +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_flatten_tex_tree +@INPUT : top +@OUTPUT : +@RETURNS : flattened string representation of the tree (as a string + allocated with malloc(), so you should free() it when + you're done with it) +@DESCRIPTION: Counts the number of characters needed for a "flat" + string representation of a tree, allocates a string of + that size, and generates the string. +@GLOBALS : +@CALLS : count_length, flatten_tree +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +char * +bt_flatten_tex_tree (bt_tex_tree *top) +{ + int len; + int offset; + char * buf; + + len = count_length (top); + buf = (char *) malloc (sizeof (char) * (len+1)); + offset = 0; + flatten_tree (top, buf, &offset); + return buf; +} diff --git a/src/translators/btparse/tokens.h b/src/translators/btparse/tokens.h new file mode 100644 index 0000000..6f9405a --- /dev/null +++ b/src/translators/btparse/tokens.h @@ -0,0 +1,41 @@ +#ifndef tokens_h +#define tokens_h +/* tokens.h -- List of labelled tokens and stuff + * + * Generated from: bibtex.g + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * ANTLR Version 1.33 + */ +#define zzEOF_TOKEN 1 +#define AT 2 +#define COMMENT 4 +#define NUMBER 9 +#define NAME 10 +#define LBRACE 11 +#define RBRACE 12 +#define ENTRY_OPEN 13 +#define ENTRY_CLOSE 14 +#define EQUALS 15 +#define HASH 16 +#define COMMA 17 +#define STRING 25 + +void bibfile(AST**_root); +void entry(AST**_root); +void body(AST**_root, bt_metatype metatype ); +void contents(AST**_root, bt_metatype metatype ); +void fields(AST**_root); +void field(AST**_root); +void value(AST**_root); +void simple_value(AST**_root); + +#endif +extern SetWordType zzerr1[]; +extern SetWordType zzerr2[]; +extern SetWordType zzerr3[]; +extern SetWordType zzerr4[]; +extern SetWordType setwd1[]; +extern SetWordType zzerr5[]; +extern SetWordType setwd2[]; diff --git a/src/translators/btparse/traversal.c b/src/translators/btparse/traversal.c new file mode 100644 index 0000000..c7e10a2 --- /dev/null +++ b/src/translators/btparse/traversal.c @@ -0,0 +1,187 @@ +/* ------------------------------------------------------------------------ +@NAME : traversal.c +@DESCRIPTION: Routines for traversing the AST for a single entry. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/21, Greg Ward +@MODIFIED : +@VERSION : $Id: traversal.c,v 1.17 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include "btparse.h" +#include "parse_auxiliary.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + + +AST *bt_next_entry (AST *entry_list, AST *prev_entry) +{ + if (entry_list == NULL || entry_list->nodetype != BTAST_ENTRY) + return NULL; + + if (prev_entry) + { + if (prev_entry->nodetype != BTAST_ENTRY) + return NULL; + else + return prev_entry->right; + } + else + return entry_list; +} + + +bt_metatype bt_entry_metatype (AST *entry) +{ + if (!entry) return BTE_UNKNOWN; + if (entry->nodetype != BTAST_ENTRY) + return BTE_UNKNOWN; + else + return entry->metatype; +} + + +char *bt_entry_type (AST *entry) +{ + if (!entry) return NULL; + if (entry->nodetype != BTAST_ENTRY) + return NULL; + else + return entry->text; +} + + +char *bt_entry_key (AST *entry) +{ + if (entry->metatype == BTE_REGULAR && + entry->down && entry->down->nodetype == BTAST_KEY) + { + return entry->down->text; + } + else + { + return NULL; + } +} + + +AST *bt_next_field (AST *entry, AST *prev, char **name) +{ + AST *field; + bt_metatype metatype; + + *name = NULL; + if (!entry || !entry->down) return NULL; /* protect against empty entry */ + + metatype = entry->metatype; + if (metatype != BTE_MACRODEF && metatype != BTE_REGULAR) + return NULL; + + if (prev == NULL) /* no previous field -- they must */ + { /* want the first one */ + field = entry->down; + if (metatype == BTE_REGULAR && field->nodetype == BTAST_KEY) + field = field->right; /* skip over citation key if present */ + } + else /* they really do want the next one */ + { + field = prev->right; + } + + if (!field) return NULL; /* protect against field-less entry */ + if (name) *name = field->text; + return field; +} /* bt_next_field() */ + + +AST *bt_next_macro (AST *entry, AST *prev, char **name) +{ + return bt_next_field (entry, prev, name); +} + + +AST *bt_next_value (AST *top, AST *prev, bt_nodetype *nodetype, char **text) +{ + bt_nodetype nt; /* type of `top' node (to check) */ + bt_metatype mt; + AST * value; + + if (nodetype) *nodetype = BTAST_BOGUS; + if (text) *text = NULL; + + if (!top) return NULL; + /* get_node_type (top, &nt, &mt); */ + nt = top->nodetype; + mt = top->metatype; + + if ((nt == BTAST_FIELD) || + (nt == BTAST_ENTRY && (mt == BTE_COMMENT || mt == BTE_PREAMBLE))) + { + if (prev == NULL) /* no previous value -- give 'em */ + { /* the first one */ + value = top->down; + if (!value) return NULL; + if (nodetype) *nodetype = value->nodetype; + } + else + { + value = prev->right; + if (!value) return NULL; + if (nodetype) *nodetype = value->nodetype; + } + + if (nt == BTAST_ENTRY && value->nodetype != BTAST_STRING) + internal_error ("found comment or preamble with non-string value"); + } + else + { + value = NULL; + } + + if (text && value) *text = value->text; + + return value; +} /* bt_next_value() */ + + +char *bt_get_text (AST *node) +{ + ushort pp_options = BTO_FULL; /* options for full processing: */ + /* expand macros, paste strings, */ + /* collapse whitespace */ + bt_nodetype nt; + bt_metatype mt; + + nt = node->nodetype; + mt = node->metatype; + + if (nt == BTAST_FIELD) + { +#if DEBUG + char *value; + + dump_ast ("bt_get_text (pre): node =\n", node); + value = bt_postprocess_field (node, pp_options, FALSE); + dump_ast ("bt_get_text (post): node =\n", node); + return value; +#else + return bt_postprocess_field (node, pp_options, FALSE); +#endif + } + else if (nt == BTAST_ENTRY && (mt == BTE_COMMENT || mt == BTE_PREAMBLE)) + { + return bt_postprocess_value (node->down, pp_options, FALSE); + } + else + { + return NULL; + } +} diff --git a/src/translators/btparse/util.c b/src/translators/btparse/util.c new file mode 100644 index 0000000..1330176 --- /dev/null +++ b/src/translators/btparse/util.c @@ -0,0 +1,79 @@ +/* ------------------------------------------------------------------------ +@NAME : util.c +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Miscellaneous utility functions. So far, just: + strlwr + strupr +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: util.c,v 1.6 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <string.h> +#include <ctype.h> +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + +/* ------------------------------------------------------------------------ +@NAME : strlwr() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Converts a string to lowercase in place. +@GLOBALS : +@CALLS : +@CREATED : 1996/01/06, GPW +@MODIFIED : +@COMMENTS : This should work the same as strlwr() in DOS compilers -- + why this isn't mandated by ANSI is a mystery to me... +-------------------------------------------------------------------------- */ +#if !HAVE_STRLWR +char *strlwr (char *s) +{ + int len, i; + + len = strlen (s); + for (i = 0; i < len; i++) + s[i] = tolower (s[i]); + + return s; +} +#endif + + + +/* ------------------------------------------------------------------------ +@NAME : strupr() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Converts a string to uppercase in place. +@GLOBALS : +@CALLS : +@CREATED : 1996/01/06, GPW +@MODIFIED : +@COMMENTS : This should work the same as strupr() in DOS compilers -- + why this isn't mandated by ANSI is a mystery to me... +-------------------------------------------------------------------------- */ +#if !HAVE_STRUPR +char *strupr (char *s) +{ + int len, i; + + len = strlen (s); + for (i = 0; i < len; i++) + s[i] = toupper (s[i]); + + return s; +} +#endif |