summaryrefslogtreecommitdiffstats
path: root/src/translators/btparse
diff options
context:
space:
mode:
authortpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2010-03-01 19:17:32 +0000
committertpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2010-03-01 19:17:32 +0000
commite38d2351b83fa65c66ccde443777647ef5cb6cff (patch)
tree1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/translators/btparse
downloadtellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz
tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/translators/btparse')
-rw-r--r--src/translators/btparse/Makefile.am18
-rw-r--r--src/translators/btparse/antlr.h561
-rw-r--r--src/translators/btparse/ast.c227
-rw-r--r--src/translators/btparse/ast.h99
-rw-r--r--src/translators/btparse/attrib.h35
-rw-r--r--src/translators/btparse/bibtex.c312
-rw-r--r--src/translators/btparse/bibtex_ast.c63
-rw-r--r--src/translators/btparse/bt_debug.h38
-rw-r--r--src/translators/btparse/btconfig.h220
-rw-r--r--src/translators/btparse/btparse.h378
-rw-r--r--src/translators/btparse/dlgauto.h408
-rw-r--r--src/translators/btparse/dlgdef.h97
-rw-r--r--src/translators/btparse/err.c75
-rw-r--r--src/translators/btparse/err.h700
-rw-r--r--src/translators/btparse/error.c348
-rw-r--r--src/translators/btparse/error.h65
-rw-r--r--src/translators/btparse/format_name.c841
-rw-r--r--src/translators/btparse/init.c42
-rw-r--r--src/translators/btparse/input.c499
-rw-r--r--src/translators/btparse/lex_auxiliary.c939
-rw-r--r--src/translators/btparse/lex_auxiliary.h71
-rw-r--r--src/translators/btparse/macros.c367
-rw-r--r--src/translators/btparse/mode.h3
-rw-r--r--src/translators/btparse/modify.c75
-rw-r--r--src/translators/btparse/my_alloca.h35
-rw-r--r--src/translators/btparse/names.c915
-rw-r--r--src/translators/btparse/parse_auxiliary.c336
-rw-r--r--src/translators/btparse/parse_auxiliary.h32
-rw-r--r--src/translators/btparse/postprocess.c498
-rw-r--r--src/translators/btparse/prototypes.h47
-rw-r--r--src/translators/btparse/scan.c615
-rw-r--r--src/translators/btparse/stdpccts.h31
-rw-r--r--src/translators/btparse/string_util.c695
-rw-r--r--src/translators/btparse/sym.c372
-rw-r--r--src/translators/btparse/sym.h33
-rw-r--r--src/translators/btparse/tex_tree.c414
-rw-r--r--src/translators/btparse/tokens.h41
-rw-r--r--src/translators/btparse/traversal.c187
-rw-r--r--src/translators/btparse/util.c79
39 files changed, 10811 insertions, 0 deletions
diff --git a/src/translators/btparse/Makefile.am b/src/translators/btparse/Makefile.am
new file mode 100644
index 0000000..84af63b
--- /dev/null
+++ b/src/translators/btparse/Makefile.am
@@ -0,0 +1,18 @@
+####### kdevelop will overwrite this part!!! (begin)##########
+if !USE_LIBBTPARSE
+
+noinst_LIBRARIES = libbtparse.a
+
+AM_CPPFLAGS = $(all_includes)
+
+libbtparse_a_METASOURCES = AUTO
+
+libbtparse_a_SOURCES = bibtex_ast.c bibtex.c err.c ast.c scan.c util.c lex_auxiliary.c parse_auxiliary.c format_name.c string_util.c tex_tree.c names.c modify.c traversal.c sym.c macros.c error.c postprocess.c input.c init.c
+
+endif
+
+EXTRA_DIST = btparse.h init.c stdpccts.h attrib.h lex_auxiliary.h error.h parse_auxiliary.h prototypes.h tokens.h mode.h input.c postprocess.c error.c macros.c sym.h sym.c bt_debug.h traversal.c modify.c names.c my_alloca.h tex_tree.c string_util.c format_name.c antlr.h ast.h btconfig.h dlgdef.h parse_auxiliary.c lex_auxiliary.c util.c scan.c dlgauto.h ast.c err.h err.c bibtex.c bibtex_ast.c
+
+####### kdevelop will overwrite this part!!! (end)############
+
+KDE_OPTIONS = noautodist
diff --git a/src/translators/btparse/antlr.h b/src/translators/btparse/antlr.h
new file mode 100644
index 0000000..f52aba6
--- /dev/null
+++ b/src/translators/btparse/antlr.h
@@ -0,0 +1,561 @@
+/* antlr.h
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+#ifndef ANTLR_H
+#define ANTLR_H
+
+#include "btconfig.h"
+
+/*
+ * Define all of the stack setup and manipulation of $i, #i variables.
+ *
+ * Notes:
+ * The type 'Attrib' must be defined before entry into this .h file.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef int ANTLRTokenType;
+typedef unsigned char SetWordType;
+
+typedef char ANTLRChar;
+
+ /* G u e s s S t u f f */
+
+#ifdef ZZCAN_GUESS
+#ifndef ZZINF_LOOK
+#define ZZINF_LOOK
+#endif
+#endif
+
+#ifdef ZZCAN_GUESS
+typedef struct _zzjmp_buf {
+ jmp_buf state;
+ } zzjmp_buf;
+#endif
+
+
+/* can make this a power of 2 for more efficient lookup */
+#ifndef ZZLEXBUFSIZE
+#define ZZLEXBUFSIZE 2000
+#endif
+
+#define zzOvfChk \
+ if ( zzasp <= 0 ) \
+ { \
+ fprintf(stderr, zzStackOvfMsg, __FILE__, __LINE__); \
+ exit(PCCTS_EXIT_FAILURE); \
+ }
+
+#ifndef ZZA_STACKSIZE
+#define ZZA_STACKSIZE 400
+#endif
+#ifndef ZZAST_STACKSIZE
+#define ZZAST_STACKSIZE 400
+#endif
+
+#ifndef zzfailed_pred
+#define zzfailed_pred(_p) \
+ fprintf(stderr, "semantic error; failed predicate: '%s'\n",_p)
+#endif
+
+#ifdef LL_K
+#define LOOKAHEAD \
+ int zztokenLA[LL_K]; \
+ char zztextLA[LL_K][ZZLEXBUFSIZE]; \
+ int zzlap = 0, zzlabase=0; /* labase only used for DEMAND_LOOK */
+#else
+#define LOOKAHEAD \
+ int zztoken;
+#endif
+
+#ifndef zzcr_ast
+#define zzcr_ast(ast,attr,tok,text)
+#endif
+
+#ifdef DEMAND_LOOK
+#define DemandLookData int zzdirty=1;
+#else
+#define DemandLookData
+#endif
+
+ /* S t a t e S t u f f */
+
+#ifdef ZZCAN_GUESS
+#define zzGUESS_BLOCK zzantlr_state zzst; int zzrv;
+#define zzGUESS zzsave_antlr_state(&zzst); \
+ zzguessing = 1; \
+ zzrv = setjmp(zzguess_start.state);
+#define zzGUESS_FAIL longjmp(zzguess_start.state, 1)
+#define zzGUESS_DONE zzrestore_antlr_state(&zzst);
+#define zzNON_GUESS_MODE if ( !zzguessing )
+#define zzGuessData \
+ zzjmp_buf zzguess_start; \
+ int zzguessing;
+#else
+#define zzGUESS_BLOCK
+#define zzGUESS
+#define zzGUESS_FAIL
+#define zzGUESS_DONE
+#define zzNON_GUESS_MODE
+#define zzGuessData
+#endif
+
+typedef struct _zzantlr_state {
+#ifdef ZZCAN_GUESS
+ zzjmp_buf guess_start;
+ int guessing;
+#endif
+ int asp;
+ int ast_sp;
+#ifdef ZZINF_LOOK
+ int inf_lap; /* not sure we need to save this one */
+ int inf_labase;
+ int inf_last;
+#endif
+#ifdef DEMAND_LOOK
+ int dirty;
+#endif
+
+#ifdef LL_K
+ int tokenLA[LL_K];
+ char textLA[LL_K][ZZLEXBUFSIZE];
+ int lap;
+ int labase;
+#else
+ int token;
+ char text[ZZLEXBUFSIZE];
+#endif
+ } zzantlr_state;
+
+
+ /* I n f i n i t e L o o k a h e a d */
+
+
+#ifdef ZZINF_LOOK
+#define InfLookData \
+ int *zzinf_tokens; \
+ char **zzinf_text; \
+ char *zzinf_text_buffer; \
+ int *zzinf_line; \
+ int zzinf_labase; \
+ int zzinf_last;
+#else
+#define InfLookData
+#endif
+
+#ifdef ZZINF_LOOK
+
+#ifndef ZZINF_DEF_TEXT_BUFFER_SIZE
+#define ZZINF_DEF_TEXT_BUFFER_SIZE 20000
+#endif
+#ifndef ZZINF_DEF_TOKEN_BUFFER_SIZE
+#define ZZINF_DEF_TOKEN_BUFFER_SIZE 2000
+#endif
+/* WARNING!!!!!!
+ * ZZINF_BUFFER_TEXT_CHUNK_SIZE must be > sizeof(text) largest possible token.
+ */
+#ifndef ZZINF_BUFFER_TEXT_CHUNK_SIZE
+#define ZZINF_BUFFER_TEXT_CHUNK_SIZE 5000
+#endif
+#ifndef ZZINF_BUFFER_TOKEN_CHUNK_SIZE
+#define ZZINF_BUFFER_TOKEN_CHUNK_SIZE 1000
+#endif
+
+#if ZZLEXBUFSIZE > ZZINF_BUFFER_TEXT_CHUNK_SIZE
+#define ZZINF_BUFFER_TEXT_CHUNK_SIZE ZZLEXBUFSIZE+5
+#endif
+
+/* make inf_look user-access macros */
+#ifdef LL_K
+#define ZZINF_LA_VALID(i) (((zzinf_labase+i-1)-LL_K+1) <= zzinf_last)
+#define ZZINF_LA(i) zzinf_tokens[(zzinf_labase+i-1)-LL_K+1]
+#define ZZINF_LATEXT(i) zzinf_text[(zzinf_labase+i-1)-LL_K+1]
+/* #define ZZINF_LINE(i) zzinf_line[(zzinf_labase+i-1)-LL_K+1]*/
+#else
+#define ZZINF_LA_VALID(i) (((zzinf_labase+i-1)) <= zzinf_last)
+#define ZZINF_LA(i) zzinf_tokens[(zzinf_labase+i-1)]
+#define ZZINF_LATEXT(i) zzinf_text[(zzinf_labase+i-1)]
+#endif
+
+#define inf_zzgettok _inf_zzgettok()
+extern void _inf_zzgettok();
+
+#endif /* ZZINF_LOOK */
+
+
+#ifdef LL_K
+
+#define ANTLR_INFO \
+ Attrib zzempty_attr(void) {static Attrib a; return a;} \
+ Attrib zzconstr_attr(int _tok, char *_text)\
+ {Attrib a; zzcr_attr((&a),_tok,_text); return a;} \
+ int zzasp=ZZA_STACKSIZE; \
+ char zzStackOvfMsg[]="fatal: attrib/AST stack overflow %s(%d)!\n"; \
+ Attrib zzaStack[ZZA_STACKSIZE]; DemandLookData \
+ InfLookData \
+ zzGuessData
+
+#else
+
+#define ANTLR_INFO \
+ Attrib zzempty_attr(void) {static Attrib a; return a;} \
+ Attrib zzconstr_attr(int _tok, char *_text)\
+ {Attrib a; zzcr_attr((&a),_tok,_text); return a;} \
+ int zzasp=ZZA_STACKSIZE; \
+ char zzStackOvfMsg[]="fatal: attrib/AST stack overflow %s(%d)!\n"; \
+ Attrib zzaStack[ZZA_STACKSIZE]; DemandLookData \
+ InfLookData \
+ zzGuessData
+
+#endif /* LL_k */
+
+
+#ifdef ZZINF_LOOK
+
+#ifdef LL_K
+#ifdef DEMAND_LOOK
+#define zzPrimeLookAhead {zzdirty=LL_K; zzlap = zzlabase = 0;}
+#else
+#define zzPrimeLookAhead {zzlap = zzlabase = 0; zzfill_inf_look();\
+ {int _i; for(_i=1;_i<=LL_K; _i++) \
+ {zzCONSUME;} zzlap = zzlabase = 0;}}
+#endif
+
+#else /* LL_K */
+
+#ifdef DEMAND_LOOK
+#define zzPrimeLookAhead zzfill_inf_look(); zzdirty=1
+#else
+#define zzPrimeLookAhead zzfill_inf_look(); inf_zzgettok
+
+#endif
+#endif /* LL_K */
+
+#else /* ZZINF_LOOK */
+
+#ifdef LL_K
+#ifdef DEMAND_LOOK
+#define zzPrimeLookAhead {zzdirty=LL_K; zzlap = zzlabase = 0;}
+#else
+#define zzPrimeLookAhead {int _i; zzlap = 0; for(_i=1;_i<=LL_K; _i++) \
+ {zzCONSUME;} zzlap = 0;}
+#endif
+
+#else
+
+#ifdef DEMAND_LOOK
+#define zzPrimeLookAhead zzdirty=1
+#else
+#define zzPrimeLookAhead zzgettok()
+#endif
+#endif /* LL_K */
+
+#endif /* ZZINF_LOOK */
+
+
+#ifdef LL_K
+#define zzenterANTLRs(s) \
+ zzlextext = &(zztextLA[0][0]); zzrdstr( s ); zzPrimeLookAhead;
+#define zzenterANTLRf(f) \
+ zzlextext = &(zztextLA[0][0]); zzrdfunc( f ); zzPrimeLookAhead;
+#define zzenterANTLR(f) \
+ zzlextext = &(zztextLA[0][0]); zzrdstream( f ); zzPrimeLookAhead;
+#ifdef ZZINF_LOOK
+#define zzleaveANTLR(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line);
+#define zzleaveANTLRf(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line);
+#define zzleaveANTLRs(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line);
+#else
+#define zzleaveANTLR(f)
+#define zzleaveANTLRf(f)
+#define zzleaveANTLRs(f)
+#endif
+
+#else
+
+#define zzenterANTLRs(s) \
+ {static char zztoktext[ZZLEXBUFSIZE]; \
+ zzlextext = zztoktext; zzrdstr( s ); zzPrimeLookAhead;}
+#define zzenterANTLRf(f) \
+ {static char zztoktext[ZZLEXBUFSIZE]; \
+ zzlextext = zztoktext; zzrdfunc( f ); zzPrimeLookAhead;}
+#define zzenterANTLR(f) \
+ {static char zztoktext[ZZLEXBUFSIZE]; \
+ zzlextext = zztoktext; zzrdstream( f ); zzPrimeLookAhead;}
+#ifdef ZZINF_LOOK
+#define zzleaveANTLR(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line);
+#define zzleaveANTLRf(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line);
+#define zzleaveANTLRs(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line);
+#else
+#define zzleaveANTLR(f)
+#define zzleaveANTLRf(f)
+#define zzleaveANTLRs(f)
+#endif
+
+#endif
+
+#define ANTLR(st, f) zzbufsize = ZZLEXBUFSIZE; \
+ zzenterANTLR(f); \
+ st; ++zzasp; \
+ zzleaveANTLR(f);
+
+#define ANTLRm(st, f, _m) zzbufsize = ZZLEXBUFSIZE; \
+ zzmode(_m); \
+ zzenterANTLR(f); \
+ st; ++zzasp; \
+ zzleaveANTLR(f);
+
+#define ANTLRf(st, f) zzbufsize = ZZLEXBUFSIZE; \
+ zzenterANTLRf(f); \
+ st; ++zzasp; \
+ zzleaveANTLRf(f);
+
+#define ANTLRs(st, s) zzbufsize = ZZLEXBUFSIZE; \
+ zzenterANTLRs(s); \
+ st; ++zzasp; \
+ zzleaveANTLRs(s);
+
+#ifdef LL_K
+#define zztext (&(zztextLA[zzlap][0]))
+#else
+#define zztext zzlextext
+#endif
+
+
+ /* A r g u m e n t A c c e s s */
+
+#define zzaCur (zzaStack[zzasp])
+#define zzaRet (*zzaRetPtr)
+#define zzaArg(v,n) zzaStack[v-n]
+#define zzMakeAttr { zzNON_GUESS_MODE {zzOvfChk; --zzasp; zzcr_attr(&(zzaStack[zzasp]),LA(1),LATEXT(1));}}
+#ifdef zzdef0
+#define zzMake0 { zzOvfChk; --zzasp; zzdef0(&(zzaStack[zzasp]));}
+#else
+#define zzMake0 { zzOvfChk; --zzasp;}
+#endif
+#define zzaPush(_v) { zzOvfChk; zzaStack[--zzasp] = _v;}
+#ifndef zzd_attr
+#define zzREL(t) zzasp=(t); /* Restore state of stack */
+#else
+#define zzREL(t) for (; zzasp<(t); zzasp++) \
+ { zzd_attr(&(zzaStack[zzasp])); }
+#endif
+
+#define zzsetmatch(_es) \
+ if ( !_zzsetmatch(_es, &zzBadText, &zzMissText, &zzMissTok, &zzBadTok, &zzMissSet) ) goto fail;
+#define zzsetmatch_wsig(_es, handler) \
+ if ( !_zzsetmatch_wsig(_es) ) {_signal=MismatchedToken; goto handler;}
+
+extern int _zzsetmatch(SetWordType *, char **, char **, int *, int *, SetWordType **);
+extern int _zzsetmatch_wsig(SetWordType *);
+
+#define zzmatch(_t) \
+ if ( !_zzmatch(_t, &zzBadText, &zzMissText, &zzMissTok, &zzBadTok, &zzMissSet) ) goto fail;
+#define zzmatch_wsig(_t,handler) \
+ if ( !_zzmatch_wsig(_t) ) {_signal=MismatchedToken; goto handler;}
+
+extern int _zzmatch(int, const char **, const char **, int *, int *, SetWordType **);
+extern int _zzmatch_wsig(int);
+
+#define zzmatch_wdfltsig(_t,_f) \
+ if ( !_zzmatch_wdfltsig(_t,_f) ) _signal=MismatchedToken;
+#define zzsetmatch_wdfltsig(tw,tt,wf) \
+ if ( !_zzsetmatch_wdfltsig(tw,tt,wf) ) _signal=MismatchedToken;
+
+extern int _zzmatch_wdfltsig(int, SetWordType *);
+extern int _zzsetmatch_wdfltsig(SetWordType *tokensWanted,
+ int tokenTypeOfSet,
+ SetWordType *whatFollows);
+
+#ifdef GENAST
+#define zzRULE Attrib *zzaRetPtr = &(zzaStack[zzasp-1]); \
+ SetWordType *zzMissSet=NULL; int zzMissTok=0; \
+ int zzBadTok=0; const char *zzBadText=""; \
+ int zzErrk=1; \
+ const char *zzMissText=""; zzASTVars
+#else
+#define zzRULE Attrib *zzaRetPtr = &(zzaStack[zzasp-1]); \
+ int zzBadTok=0; const char *zzBadText=""; \
+ int zzErrk=1; \
+ SetWordType *zzMissSet=NULL; int zzMissTok=0; const char *zzMissText=""
+#endif
+
+#ifdef GENAST
+#define zzBLOCK(i) int i = zzasp - 1; int zztsp = zzast_sp
+#define zzEXIT(i) zzREL(i); zzastREL; zzNON_GUESS_MODE { zzastPush(*_root); }
+#define zzLOOP(i) zzREL(i); zzastREL
+#else
+#define zzBLOCK(i) int i = zzasp - 1
+#define zzEXIT(i) zzREL(i)
+#define zzLOOP(i) zzREL(i)
+#endif
+
+#ifdef LL_K
+
+#ifdef DEMAND_LOOK
+#define LOOK(_k) {int i,stop=_k-(LL_K-zzdirty); for (i=1; i<=stop; i++) \
+ zzCONSUME;}
+#define zzCONSUME {zzgettok(); zzdirty--; \
+ zzlap = (zzlap+1)&(LL_K-1); \
+ zzlextext = &(zztextLA[zzlap][0]);}
+#else
+#ifdef ZZINF_LOOK
+#define zzCONSUME {inf_zzgettok; \
+ zzlap = (zzlap+1)&(LL_K-1); \
+ zzlextext = &(zztextLA[zzlap][0]); \
+ }
+#else
+#define zzCONSUME {zzgettok(); \
+ zzlap = (zzlap+1)&(LL_K-1); \
+ zzlextext = &(zztextLA[zzlap][0]);}
+#endif /* ZZINF_LOOK */
+#endif /* DEMAND_LOOK */
+
+#else /* LL_K */
+
+#ifdef DEMAND_LOOK
+#define LOOK(_k) if ( zzdirty) zzCONSUME;
+#ifdef ZZINF_LOOK
+#define zzCONSUME inf_zzgettok; zzdirty=0;
+#else
+#define zzCONSUME zzgettok(); zzdirty=0;
+#endif /* ZZINF_LOOK */
+
+#else /* DEMAND_LOOK */
+
+#ifdef ZZINF_LOOK
+#define zzCONSUME inf_zzgettok
+#else
+#define zzCONSUME zzgettok();
+#endif
+
+#endif /* DEMAND_LOOK */
+
+#endif /* LL_K */
+
+#ifdef LL_K
+#define NLA zztokenLA[zzlap&(LL_K-1)] /* --> next LA */
+#define NLATEXT zztextLA[zzlap&(LL_K-1)] /* --> next text of LA */
+#ifdef DEMAND_LOOK
+#define LA(i) zztokenLA[(zzlabase+(i)-1)&(LL_K-1)]
+#define LATEXT(i) (&(zztextLA[(zzlabase+(i)-1)&(LL_K-1)][0]))
+#else
+#define LA(i) zztokenLA[(zzlap+(i)-1)&(LL_K-1)]
+#define LATEXT(i) (&(zztextLA[(zzlap+(i)-1)&(LL_K-1)][0]))
+#endif
+#else
+#define NLA zztoken
+#define NLATEXT zztext
+#define LA(i) zztoken
+#define LATEXT(i) zztext
+#endif
+
+
+ /* S t a n d a r d S i g n a l s */
+
+#define NoSignal 0
+#define MismatchedToken 1
+#define NoViableAlt 2
+#define NoSemViableAlt 3
+
+
+ /* F u n c t i o n T r a c i n g */
+
+#ifndef zzTRACEIN
+#define zzTRACEIN(r) fprintf(stderr, "enter rule \"%s\"\n", r);
+#endif
+#ifndef zzTRACEOUT
+#define zzTRACEOUT(r) fprintf(stderr, "exit rule \"%s\"\n", r);
+#endif
+
+#ifdef ZZWCHAR_T
+#define zzchar_t unsigned wchar_t
+#else
+#define zzchar_t unsigned char
+#endif
+
+ /* E x t e r n D e f s */
+
+extern Attrib zzempty_attr(void);
+extern Attrib zzconstr_attr(int, char *);
+extern void zzsyn(const char *, int, char *, SetWordType *, int, int, const char *);
+extern int zzset_el(unsigned, SetWordType *);
+extern int zzset_deg(SetWordType *);
+extern void zzedecode(SetWordType *);
+extern void zzFAIL(int k, ...);
+extern void zzresynch(SetWordType *, SetWordType);
+extern void zzsave_antlr_state(zzantlr_state *);
+extern void zzrestore_antlr_state(zzantlr_state *);
+extern void zzfill_inf_look(void);
+#ifdef EXCEPTION_HANDLING
+extern void zzdflthandlers(int, int *);
+#endif
+
+ /* G l o b a l V a r i a b l e s */
+
+/* Define a parser; user should do a "#parser myname" in their grammar file */
+/*extern struct pccts_parser zzparser;*/
+
+extern const char *zztokens[];
+#ifdef LL_K
+extern int zztokenLA[];
+extern char zztextLA[][ZZLEXBUFSIZE];
+extern int zzlap;
+extern int zzlabase;
+#else
+extern int zztoken;
+#endif
+
+extern char zzStackOvfMsg[];
+extern int zzasp;
+extern Attrib zzaStack[];
+#ifdef ZZINF_LOOK
+extern int *zzinf_tokens;
+extern char **zzinf_text;
+extern char *zzinf_text_buffer;
+extern int *zzinf_line;
+extern int zzinf_labase;
+extern int zzinf_last;
+#endif
+#ifdef DEMAND_LOOK
+extern int zzdirty;
+#endif
+#ifdef ZZCAN_GUESS
+extern int zzguessing;
+extern zzjmp_buf zzguess_start;
+#endif
+
+/* Define global veriables that refer to values exported by the scanner.
+ * These declarations duplicate those in dlgdef.h, but are needed
+ * if ANTLR is not to generate a .dlg file (-gx); PS, this is a hack.
+ */
+extern zzchar_t *zzlextext; /* text of most recently matched token */
+extern int zzbufsize; /* how long zzlextext is */
+
+#endif
diff --git a/src/translators/btparse/ast.c b/src/translators/btparse/ast.c
new file mode 100644
index 0000000..d433f79
--- /dev/null
+++ b/src/translators/btparse/ast.c
@@ -0,0 +1,227 @@
+/* Abstract syntax tree manipulation functions
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "ast.h"
+#include "attrib.h"
+#include "antlr.h"
+
+/* ensure that tree manipulation variables are current after a rule
+ * reference
+ */
+void
+zzlink(AST **_root, AST **_sibling, AST **_tail)
+{
+ if ( *_sibling == NULL ) return;
+ if ( *_root == NULL ) *_root = *_sibling;
+ else if ( *_root != *_sibling ) (*_root)->down = *_sibling;
+ if ( *_tail==NULL ) *_tail = *_sibling;
+ while ( (*_tail)->right != NULL ) *_tail = (*_tail)->right;
+}
+
+AST *
+zzastnew(void)
+{
+ AST *p = (AST *) calloc(1, sizeof(AST));
+ if ( p == NULL ) fprintf(stderr,"%s(%d): cannot allocate AST node\n",__FILE__,__LINE__);
+ return p;
+}
+
+/* add a child node to the current sibling list */
+void
+zzsubchild(AST **_root, AST **_sibling, AST **_tail)
+{
+ AST *n;
+ zzNON_GUESS_MODE {
+ n = zzastnew();
+#ifdef DEMAND_LOOK
+ zzcr_ast(n, &(zzaCur), LA(0), LATEXT(0));
+#else
+ zzcr_ast(n, &(zzaCur), LA(1), LATEXT(1));
+#endif
+ zzastPush( n );
+ if ( *_tail != NULL ) (*_tail)->right = n;
+ else {
+ *_sibling = n;
+ if ( *_root != NULL ) (*_root)->down = *_sibling;
+ }
+ *_tail = n;
+ if ( *_root == NULL ) *_root = *_sibling;
+ }
+}
+
+/* make a new AST node. Make the newly-created
+ * node the root for the current sibling list. If a root node already
+ * exists, make the newly-created node the root of the current root.
+ */
+void
+zzsubroot(AST **_root, AST **_sibling, AST **_tail)
+{
+ AST *n;
+ zzNON_GUESS_MODE {
+ n = zzastnew();
+#ifdef DEMAND_LOOK
+ zzcr_ast(n, &(zzaCur), LA(0), LATEXT(0));
+#else
+ zzcr_ast(n, &(zzaCur), LA(1), LATEXT(1));
+#endif
+ zzastPush( n );
+ if ( *_root != NULL )
+ if ( (*_root)->down == *_sibling ) *_sibling = *_tail = *_root;
+ *_root = n;
+ (*_root)->down = *_sibling;
+ }
+}
+
+/* Apply function to root then each sibling
+ * example: print tree in child-sibling LISP-format (AST has token field)
+ *
+ * void show(tree)
+ * AST *tree;
+ * {
+ * if ( tree == NULL ) return;
+ * printf(" %s", zztokens[tree->token]);
+ * }
+ *
+ * void before() { printf(" ("); }
+ * void after() { printf(" )"); }
+ *
+ * LISPdump() { zzpre_ast(tree, show, before, after); }
+ *
+ */
+void
+zzpre_ast(
+ AST *tree,
+ void (*func)(AST *), /* apply this to each tree node */
+ void (*before)(AST *), /* apply this to root of subtree before preordering it */
+ void (*after)(AST *)) /* apply this to root of subtree after preordering it */
+{
+ while ( tree!= NULL )
+ {
+ if ( tree->down != NULL ) (*before)(tree);
+ (*func)(tree);
+ zzpre_ast(tree->down, func, before, after);
+ if ( tree->down != NULL ) (*after)(tree);
+ tree = tree->right;
+ }
+}
+
+/* free all AST nodes in tree; apply func to each before freeing */
+void
+zzfree_ast(AST *tree)
+{
+ if ( tree == NULL ) return;
+ zzfree_ast( tree->down );
+ zzfree_ast( tree->right );
+ zztfree( tree );
+}
+
+/* build a tree (root child1 child2 ... NULL)
+ * If root is NULL, simply make the children siblings and return ptr
+ * to 1st sibling (child1). If root is not single node, return NULL.
+ *
+ * Siblings that are actually siblins lists themselves are handled
+ * correctly. For example #( NULL, #( NULL, A, B, C), D) results
+ * in the tree ( NULL A B C D ).
+ *
+ * Requires at least two parameters with the last one being NULL. If
+ * both are NULL, return NULL.
+ */
+AST *zztmake(AST *rt, ...)
+{
+ va_list ap;
+ register AST *child, *sibling=NULL, *tail, *w;
+ AST *root;
+
+ va_start(ap, rt);
+ root = rt;
+
+ if ( root != NULL )
+ if ( root->down != NULL ) return NULL;
+ child = va_arg(ap, AST *);
+ while ( child != NULL )
+ {
+ for (w=child; w->right!=NULL; w=w->right) {;} /* find end of child */
+ if ( sibling == NULL ) {sibling = child; tail = w;}
+ else {tail->right = child; tail = w;}
+ child = va_arg(ap, AST *);
+ }
+ if ( root==NULL ) root = sibling;
+ else root->down = sibling;
+ va_end(ap);
+ return root;
+}
+
+/* tree duplicate */
+AST *
+zzdup_ast(AST *t)
+{
+ AST *u;
+
+ if ( t == NULL ) return NULL;
+ u = zzastnew();
+ *u = *t;
+#ifdef zzAST_DOUBLE
+ u->up = NULL; /* set by calling invocation */
+ u->left = NULL;
+#endif
+ u->right = zzdup_ast(t->right);
+ u->down = zzdup_ast(t->down);
+#ifdef zzAST_DOUBLE
+ if ( u->right!=NULL ) u->right->left = u;
+ if ( u->down!=NULL ) u->down->up = u;
+#endif
+ return u;
+}
+
+void
+zztfree(AST *t)
+{
+#ifdef zzd_ast
+ zzd_ast( t );
+#endif
+ free( t );
+}
+
+#ifdef zzAST_DOUBLE
+/*
+ * Set the 'up', and 'left' pointers of all nodes in 't'.
+ * Initial call is double_link(your_tree, NULL, NULL).
+ */
+void
+zzdouble_link(AST *t, AST *left, AST *up)
+{
+ if ( t==NULL ) return;
+ t->left = left;
+ t->up = up;
+ zzdouble_link(t->down, NULL, t);
+ zzdouble_link(t->right, t, up);
+}
+#endif
diff --git a/src/translators/btparse/ast.h b/src/translators/btparse/ast.h
new file mode 100644
index 0000000..59622ec
--- /dev/null
+++ b/src/translators/btparse/ast.h
@@ -0,0 +1,99 @@
+/* Abstract syntax tree
+ *
+ * Macros, definitions
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+
+#ifndef ZZAST_H
+#define ZZAST_H
+
+#define zzastOvfChk \
+ if ( zzast_sp <= 0 ) \
+ { \
+ fprintf(stderr, zzStackOvfMsg, __FILE__, __LINE__); \
+ exit(PCCTS_EXIT_FAILURE); \
+ }
+
+#ifndef USER_DEFINED_AST
+#ifndef AST_FIELDS
+#define AST_FIELDS
+#endif
+
+typedef struct _ast {
+ struct _ast *right, *down;
+#ifdef zzAST_DOUBLE
+ struct _ast *left, *up;
+#endif
+ AST_FIELDS
+} AST;
+
+#else
+
+#ifdef zzAST_DOUBLE
+#define AST_REQUIRED_FIELDS struct _ast *right, *down, *left, *up;
+#else
+#define AST_REQUIRED_FIELDS struct _ast *right, *down;
+#endif
+
+#endif
+
+
+/* N o d e a c c e s s m a c r o s */
+#define zzchild(t) (((t)==NULL)?NULL:(t->down))
+#define zzsibling(t) (((t)==NULL)?NULL:(t->right))
+
+
+/* define global variables needed by #i stack */
+#define zzASTgvars \
+ AST *zzastStack[ZZAST_STACKSIZE]; \
+ int zzast_sp = ZZAST_STACKSIZE;
+
+#define zzASTVars AST *_ast = NULL, *_sibling = NULL, *_tail = NULL
+#define zzSTR ( (_tail==NULL)?(&_sibling):(&(_tail->right)) )
+#define zzastCur (zzastStack[zzast_sp])
+#define zzastArg(i) (zzastStack[zztsp-i])
+#define zzastPush(p) zzastOvfChk; zzastStack[--zzast_sp] = p;
+#define zzastDPush --zzast_sp
+#define zzastMARK zztsp=zzast_sp; /* Save state of stack */
+#define zzastREL zzast_sp=zztsp; /* Return state of stack */
+#define zzrm_ast {zzfree_ast(*_root); _tail = _sibling = (*_root)=NULL;}
+
+extern int zzast_sp;
+extern AST *zzastStack[];
+
+void zzlink(AST **, AST **, AST **);
+void zzsubchild(AST **, AST **, AST **);
+void zzsubroot(AST **, AST **, AST **);
+void zzpre_ast(AST *, void (*)(), void (*)(), void (*)());
+void zzfree_ast(AST *);
+AST *zztmake(AST *, ...);
+AST *zzdup_ast(AST *);
+void zztfree(AST *);
+void zzdouble_link(AST *, AST *, AST *);
+AST *zzastnew(void);
+#endif
diff --git a/src/translators/btparse/attrib.h b/src/translators/btparse/attrib.h
new file mode 100644
index 0000000..6a3cecf
--- /dev/null
+++ b/src/translators/btparse/attrib.h
@@ -0,0 +1,35 @@
+/* ------------------------------------------------------------------------
+@NAME : attrib.h
+@DESCRIPTION: Definition of the Attrib type needed by the PCCTS-
+ generated parser.
+@CREATED : Summer 1996, Greg Ward
+@MODIFIED :
+@VERSION : $Id: attrib.h,v 1.3 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+#ifndef ATTRIB_H
+#define ATTRIB_H
+
+/*
+ * Defining Attrib this way (as opposed to making it a pointer to a struct)
+ * avoid the expense of allocating/deallocating a structure for each token;
+ * this way, PCCTS statically allocates the whole stack once and that's
+ * it. (Of course, the stack is four times bigger than it would have been
+ * otherwise.)
+ */
+
+typedef struct {
+ int line;
+ int offset;
+ int token;
+ char *text;
+} Attrib;
+
+#endif /* ATTRIB_H */
diff --git a/src/translators/btparse/bibtex.c b/src/translators/btparse/bibtex.c
new file mode 100644
index 0000000..c922803
--- /dev/null
+++ b/src/translators/btparse/bibtex.c
@@ -0,0 +1,312 @@
+/*
+ * A n t l r T r a n s l a t i o n H e a d e r
+ *
+ * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994
+ * Purdue University Electrical Engineering
+ * With AHPCRC, University of Minnesota
+ * ANTLR Version 1.33
+ */
+#include <stdio.h>
+#define ANTLR_VERSION 133
+
+#define ZZCOL
+#define USER_ZZSYN
+
+#include "btconfig.h"
+#include "btparse.h"
+#include "attrib.h"
+#include "lex_auxiliary.h"
+#include "error.h"
+#include "parse_auxiliary.h"
+/*#include "my_dmalloc.h"*/
+
+extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */
+#define GENAST
+
+#include "ast.h"
+
+#define zzSET_SIZE 4
+#include "antlr.h"
+#include "tokens.h"
+#include "dlgdef.h"
+#include "mode.h"
+#ifndef PURIFY
+#define PURIFY(r,s)
+#endif
+#include "ast.c"
+zzASTgvars
+
+ANTLR_INFO
+
+void
+bibfile(AST**_root)
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ AST *last; (*_root) = NULL;
+ {
+ zzBLOCK(zztasp2);
+ zzMake0;
+ {
+ while ( (LA(1)==AT) ) {
+ _ast = NULL; entry(&_ast);
+ /* a little creative forestry... */
+ if ((*_root) == NULL)
+ (*_root) = zzastArg(1);
+ else
+ last->right = zzastArg(1);
+ last = zzastArg(1);
+ zzLOOP(zztasp2);
+ }
+ zzEXIT(zztasp2);
+ }
+ }
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd1, 0x1);
+ }
+}
+
+void
+entry(AST**_root)
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ bt_metatype metatype;
+ zzmatch(AT); zzCONSUME;
+ zzmatch(NAME); zzsubroot(_root, &_sibling, &_tail);
+
+ metatype = entry_metatype();
+ zzastArg(1)->nodetype = BTAST_ENTRY;
+ zzastArg(1)->metatype = metatype;
+ zzCONSUME;
+
+ body(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail);
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd1, 0x2);
+ }
+}
+
+void
+body(AST**_root, bt_metatype metatype )
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ if ( (LA(1)==STRING) ) {
+ if (!(metatype == BTE_COMMENT )) {zzfailed_pred(" metatype == BTE_COMMENT ");}
+ zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail);
+ zzastArg(1)->nodetype = BTAST_STRING;
+ zzCONSUME;
+
+ }
+ else {
+ if ( (LA(1)==ENTRY_OPEN) ) {
+ zzmatch(ENTRY_OPEN); zzCONSUME;
+ contents(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail);
+ zzmatch(ENTRY_CLOSE); zzCONSUME;
+ }
+ else {zzFAIL(1,zzerr1,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
+ }
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd1, 0x4);
+ }
+}
+
+void
+contents(AST**_root, bt_metatype metatype )
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ if ( (setwd1[LA(1)]&0x8)&&(metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ ) ) {
+ if (!(metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ )) {zzfailed_pred(" metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ ");}
+ {
+ zzBLOCK(zztasp2);
+ zzMake0;
+ {
+ if ( (LA(1)==NAME) ) {
+ zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail); zzCONSUME;
+ }
+ else {
+ if ( (LA(1)==NUMBER) ) {
+ zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail); zzCONSUME;
+ }
+ else {zzFAIL(1,zzerr2,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
+ }
+ zzEXIT(zztasp2);
+ }
+ }
+ zzastArg(1)->nodetype = BTAST_KEY;
+ zzmatch(COMMA); zzCONSUME;
+ fields(zzSTR); zzlink(_root, &_sibling, &_tail);
+ }
+ else {
+ if ( (setwd1[LA(1)]&0x10)&&(metatype == BTE_MACRODEF ) ) {
+ if (!(metatype == BTE_MACRODEF )) {zzfailed_pred(" metatype == BTE_MACRODEF ");}
+ fields(zzSTR); zzlink(_root, &_sibling, &_tail);
+ }
+ else {
+ if ( (setwd1[LA(1)]&0x20)&&(metatype == BTE_PREAMBLE ) ) {
+ if (!(metatype == BTE_PREAMBLE )) {zzfailed_pred(" metatype == BTE_PREAMBLE ");}
+ value(zzSTR); zzlink(_root, &_sibling, &_tail);
+ }
+ else {zzFAIL(1,zzerr3,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
+ }
+ }
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd1, 0x40);
+ }
+}
+
+void
+fields(AST**_root)
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ if ( (LA(1)==NAME) ) {
+ field(zzSTR); zzlink(_root, &_sibling, &_tail);
+ {
+ zzBLOCK(zztasp2);
+ zzMake0;
+ {
+ if ( (LA(1)==COMMA) ) {
+ zzmatch(COMMA); zzCONSUME;
+ fields(zzSTR); zzlink(_root, &_sibling, &_tail);
+ }
+ zzEXIT(zztasp2);
+ }
+ }
+ }
+ else {
+ if ( (LA(1)==ENTRY_CLOSE) ) {
+ }
+ else {zzFAIL(1,zzerr4,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
+ }
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd1, 0x80);
+ }
+}
+
+void
+field(AST**_root)
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ zzmatch(NAME); zzsubroot(_root, &_sibling, &_tail);
+ zzastArg(1)->nodetype = BTAST_FIELD; check_field_name (zzastArg(1));
+ zzCONSUME;
+
+ zzmatch(EQUALS); zzCONSUME;
+ value(zzSTR); zzlink(_root, &_sibling, &_tail);
+
+#if DEBUG > 1
+ printf ("field: fieldname = %p (%s)\n"
+ " first val = %p (%s)\n",
+ zzastArg(1)->text, zzastArg(1)->text, zzastArg(2)->text, zzastArg(2)->text);
+#endif
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd2, 0x1);
+ }
+}
+
+void
+value(AST**_root)
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ simple_value(zzSTR); zzlink(_root, &_sibling, &_tail);
+ {
+ zzBLOCK(zztasp2);
+ zzMake0;
+ {
+ while ( (LA(1)==HASH) ) {
+ zzmatch(HASH); zzCONSUME;
+ simple_value(zzSTR); zzlink(_root, &_sibling, &_tail);
+ zzLOOP(zztasp2);
+ }
+ zzEXIT(zztasp2);
+ }
+ }
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd2, 0x2);
+ }
+}
+
+void
+simple_value(AST**_root)
+{
+ zzRULE;
+ zzBLOCK(zztasp1);
+ zzMake0;
+ {
+ if ( (LA(1)==STRING) ) {
+ zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail);
+ zzastArg(1)->nodetype = BTAST_STRING;
+ zzCONSUME;
+
+ }
+ else {
+ if ( (LA(1)==NUMBER) ) {
+ zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail);
+ zzastArg(1)->nodetype = BTAST_NUMBER;
+ zzCONSUME;
+
+ }
+ else {
+ if ( (LA(1)==NAME) ) {
+ zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail);
+ zzastArg(1)->nodetype = BTAST_MACRO;
+ zzCONSUME;
+
+ }
+ else {zzFAIL(1,zzerr5,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
+ }
+ }
+ zzEXIT(zztasp1);
+ return;
+fail:
+ zzEXIT(zztasp1);
+ zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText);
+ zzresynch(setwd2, 0x4);
+ }
+}
diff --git a/src/translators/btparse/bibtex_ast.c b/src/translators/btparse/bibtex_ast.c
new file mode 100644
index 0000000..354cefb
--- /dev/null
+++ b/src/translators/btparse/bibtex_ast.c
@@ -0,0 +1,63 @@
+/* ------------------------------------------------------------------------
+@NAME : bibtex_ast.c
+@DESCRIPTION: Data and functions for internal display/manipulation of AST
+ nodes. (Stuff for external consumption, and for processing
+ whole trees, is to be found in traversal.c.)
+@GLOBALS :
+@CREATED : 1997/08/12, Greg Ward
+@MODIFIED :
+@VERSION : $Id: bibtex_ast.c,v 1.6 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include "btparse.h"
+#include "prototypes.h"
+/*#include "my_dmalloc.h"*/
+
+
+const char *nodetype_names[] =
+{
+ "bogus", "entry", "key", "field", "string", "number", "macro"
+};
+
+
+static void dump (AST *root, int depth)
+{
+ AST *cur;
+
+ if (root == NULL)
+ {
+ printf ("[empty]\n");
+ return;
+ }
+
+ cur = root;
+ while (cur != NULL)
+ {
+ printf ("%*s[%s]: ", 2*depth, "", nodetype_names[cur->nodetype]);
+ if (cur->text != NULL)
+ printf ("(%s)\n", cur->text);
+ else
+ printf ("(null)\n");
+
+ if (cur->down != NULL)
+ dump (cur->down, depth+1);
+ cur = cur->right;
+ }
+}
+
+
+void dump_ast (char *msg, AST *root)
+{
+ if (msg != NULL)
+ printf (msg);
+ dump (root, 0);
+ printf ("\n");
+}
diff --git a/src/translators/btparse/bt_debug.h b/src/translators/btparse/bt_debug.h
new file mode 100644
index 0000000..913ae1a
--- /dev/null
+++ b/src/translators/btparse/bt_debug.h
@@ -0,0 +1,38 @@
+/* ------------------------------------------------------------------------
+@NAME : bt_debug.h
+@DESCRIPTION: Defines various macros needed for compile-time selection
+ of debugging code.
+@GLOBALS :
+@CREATED :
+@MODIFIED :
+@VERSION : $Id: bt_debug.h,v 1.2 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+#ifndef BT_DEBUG_H
+#define BT_DEBUG_H
+
+/*
+ * DEBUG is the debug level -- an integer, defaults to 0
+ * DBG_ACTION is a macro to conditionally execute a bit of code --
+ * must have compiled with DEBUG true, and the debug level
+ * must be >= `level' (the macro argument)
+ */
+
+#ifndef DEBUG
+# define DEBUG 0
+#endif
+
+#if DEBUG
+# define DBG_ACTION(level,action) if (DEBUG >= level) { action; }
+#else
+# define DBG_ACTION(level,action)
+#endif
+
+#endif /* BT_DEBUG_H */
diff --git a/src/translators/btparse/btconfig.h b/src/translators/btparse/btconfig.h
new file mode 100644
index 0000000..7405825
--- /dev/null
+++ b/src/translators/btparse/btconfig.h
@@ -0,0 +1,220 @@
+#ifndef BTPARSE_CONFIG_H
+#define BTPARSE_CONFIG_H
+/*
+ * config.h (for ANTLR, DLG, and SORCERER)
+ *
+ * This is a simple configuration file that doesn't have config stuff
+ * in it, but it's a start.
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * Used by PCCTS 1.33 (SORCERER 1.00B11 and up)
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+
+/* This file knows about the following ``environments''
+ UNIX (default)
+ DOS (use #define PC)
+ MAC (use #define MPW; has a few things for THINK C, Metrowerks)
+ */
+
+/*
+* Define PC32 if in a 32-bit PC environment (e.g. extended DOS or Win32).
+* The macros tested here are defined by Watcom, Microsoft, Borland,
+* and djgpp, respectively, when they are used as 32-bit compilers.
+* Users of these compilers *must* be sure to define PC in their
+* makefiles for this to work correctly.
+*/
+#ifdef PC
+# if (defined(__WATCOM__) || defined(_WIN32) || defined(__WIN32__) || \
+ defined(__GNUC__) || defined(__GNUG__))
+# ifndef PC32
+# define PC32
+# endif
+# endif
+#endif
+
+#ifdef PC
+#define ATOKEN_H "AToken.h"
+#define ATOKPTR_H "ATokPtr.h"
+#define ATOKPTR_C "ATokPtr.cpp"
+#define ATOKENBUFFER_H "ATokBuf.h"
+#define ATOKENBUFFER_C "ATokBuf.cpp"
+#define ATOKENSTREAM_H "ATokStr.h"
+#define APARSER_H "AParser.h"
+#define APARSER_C "AParser.cpp"
+#define ASTBASE_H "ASTBase.h"
+#define ASTBASE_C "ASTBase.cpp"
+#define PCCTSAST_C "PCCTSAST.cpp"
+#define LIST_C "List.cpp"
+#define DLEXERBASE_H "DLexBase.h"
+#define DLEXERBASE_C "DLexBase.cpp"
+#define DLEXER_C "DLexer.cpp"
+#define STREESUPPORT_C "STreeSup.C"
+#else
+#define ATOKEN_H "AToken.h"
+#define ATOKPTR_H "ATokPtr.h"
+#define ATOKPTR_C "ATokPtr.cpp"
+#define ATOKENBUFFER_H "ATokenBuffer.h"
+#define ATOKENBUFFER_C "ATokenBuffer.cpp"
+#define ATOKENSTREAM_H "ATokenStream.h"
+#define APARSER_H "AParser.h"
+#define APARSER_C "AParser.cpp"
+#define ASTBASE_H "ASTBase.h"
+#define ASTBASE_C "ASTBase.cpp"
+#define PCCTSAST_C "PCCTSAST.cpp"
+#define LIST_C "List.cpp"
+#define DLEXERBASE_H "DLexerBase.h"
+#define DLEXERBASE_C "DLexerBase.cpp"
+#define DLEXER_C "DLexer.cpp"
+#define STREESUPPORT_C "STreeSupport.cpp"
+#endif
+
+/* SORCERER Stuff */
+#ifdef PC
+#define STPARSER_H "STreePar.h"
+#define STPARSER_C "STreePar.C"
+#else
+#define STPARSER_H "STreeParser.h"
+#define STPARSER_C "STreeParser.cpp"
+#endif
+
+#ifdef MPW
+#define CPP_FILE_SUFFIX ".cp"
+#define CPP_FILE_SUFFIX_NO_DOT "cp"
+#define OBJ_FILE_SUFFIX ".o"
+#else
+#ifdef PC
+#define CPP_FILE_SUFFIX ".cpp"
+#define CPP_FILE_SUFFIX_NO_DOT "cpp"
+#define OBJ_FILE_SUFFIX ".obj"
+#else
+#define CPP_FILE_SUFFIX ".cpp"
+#define CPP_FILE_SUFFIX_NO_DOT "cpp"
+#define OBJ_FILE_SUFFIX ".o"
+#endif
+#endif
+
+/* User may redefine how line information looks */
+#define LineInfoFormatStr "# %d \"%s\"\n"
+
+#ifdef MPW /* Macintosh Programmer's Workshop */
+#define ErrHdr "File \"%s\"; Line %d #"
+#else
+#define ErrHdr "%s, line %d:"
+#endif
+
+
+/* must assume old K&R cpp here, can't use #if defined(..)... */
+
+#ifdef MPW
+#define TopDirectory ":"
+#define DirectorySymbol ":"
+#define OutputDirectoryOption "Directory where all output files should go (default=\":\")"
+#else
+#ifdef PC
+#define TopDirectory "."
+#define DirectorySymbol "\\"
+#define OutputDirectoryOption "Directory where all output files should go (default=\".\")"
+#else
+#define TopDirectory "."
+#define DirectorySymbol "/"
+#define OutputDirectoryOption "Directory where all output files should go (default=\".\")"
+#endif
+#endif
+
+#ifdef MPW
+
+/* Make sure we have prototypes for all functions under MPW */
+
+#include <string.h>
+#include <stdlib.h>
+#include <CursorCtl.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void fsetfileinfo (char *filename, unsigned long newcreator, unsigned long newtype);
+#ifdef __cplusplus
+}
+#endif
+
+/* File creators for various popular development environments */
+
+#define MAC_FILE_CREATOR 'MPS ' /* MPW Text files */
+#if 0
+#define MAC_FILE_CREATOR 'KAHL' /* THINK C/Symantec C++ Text files */
+#endif
+#if 0
+#define MAC_FILE_CREATOR 'MMCC' /* Metrowerks C/C++ Text files */
+#endif
+
+#endif
+
+#ifdef MPW
+#define DAWDLE SpinCursor(1)
+#else
+#define DAWDLE
+#endif
+
+
+/*
+ * useless definitions of special_inits() and special_fopen_actions()
+ * deleted -- GPW 1997/09/06
+ */
+
+/* Define usable bits for set.c stuff */
+#define BytesPerWord sizeof(unsigned)
+#define WORDSIZE (sizeof(unsigned)*8)
+#define LogWordSize (WORDSIZE==16?4:5)
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifdef VAXC
+#define PCCTS_EXIT_SUCCESS 1
+#define PCCTS_EXIT_FAILURE 0
+#define zzDIE return 0;
+#define zzDONE return 1;
+
+#else /* !VAXC */
+
+#define PCCTS_EXIT_SUCCESS 0
+#define PCCTS_EXIT_FAILURE 1
+#define zzDIE return 1;
+#define zzDONE return 0;
+
+#endif
+
+#ifdef USER_ZZMODE_STACK
+# ifndef ZZSTACK_MAX_MODE
+# define ZZSTACK_MAX_MODE 32
+# endif
+# define ZZMAXSTK (ZZSTACK_MAX_MODE * 2)
+#endif
+
+#endif
diff --git a/src/translators/btparse/btparse.h b/src/translators/btparse/btparse.h
new file mode 100644
index 0000000..841d3ee
--- /dev/null
+++ b/src/translators/btparse/btparse.h
@@ -0,0 +1,378 @@
+/* ------------------------------------------------------------------------
+@NAME : btparse.h
+@DESCRIPTION: Declarations and types for users of the btparse library.
+
+ (Actually, btparse.h is generated from btparse.h.in by
+ the `configure' script, in order to automatically determine
+ the appropriate values of HAVE_USHORT and HAVE_BOOLEAN.)
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/19, Greg Ward
+@MODIFIED :
+@VERSION : $Id: btparse.h.in,v 1.35 1999/12/28 18:23:17 greg Exp $
+@COPYRIGHT : Copyright (c) 1996-97 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+#ifndef BTPARSE_H
+#define BTPARSE_H
+
+#include <sys/types.h> /* probably supplies 'ushort' */
+#include <stdio.h>
+
+#include "config.h" /* not btparse's config.h but Tellico's */
+
+/*
+ * Here we attempt to define HAVE_USHORT if a typdef for `ushort' appears
+ * in <sys/types.h>. The detective work is actually done by the
+ * `configure' script, so if compilation fails because of duplicate
+ * definitions of `ushort', that's a bug in `configure' -- please tell me
+ * about it!
+ */
+
+#ifndef HAVE_USHORT
+# define HAVE_USHORT 0
+#endif
+
+#if ! HAVE_USHORT /* needed for various bitmaps */
+typedef unsigned short ushort;
+#endif
+
+
+/* Likewise for boolean. */
+
+#ifndef HAVE_BOOLEAN
+# define HAVE_BOOLEAN 0
+#endif
+
+#if ! HAVE_BOOLEAN
+typedef int boolean;
+#endif
+
+#ifndef TRUE
+# define TRUE 1
+# define FALSE 0
+#endif
+
+#ifndef HAVE_STRLWR
+# define HAVE_STRLWR 0
+#endif
+
+#ifndef HAVE_STRUPR
+# define HAVE_STRUPR 0
+#endif
+
+
+/* Parsing (and post-processing) options */
+
+#define BTO_CONVERT 1 /* convert numbers to strings? */
+#define BTO_EXPAND 2 /* expand macros? */
+#define BTO_PASTE 4 /* paste substrings together? */
+#define BTO_COLLAPSE 8 /* collapse whitespace? */
+
+#define BTO_NOSTORE 16
+
+#define BTO_FULL (BTO_CONVERT | BTO_EXPAND | BTO_PASTE | BTO_COLLAPSE)
+#define BTO_MACRO (BTO_CONVERT | BTO_EXPAND | BTO_PASTE)
+#define BTO_MINIMAL 0
+
+#define BTO_STRINGMASK (BTO_CONVERT | BTO_EXPAND | BTO_PASTE | BTO_COLLAPSE)
+
+#define BT_VALID_NAMEPARTS "fvlj"
+#define BT_MAX_NAMEPARTS 4
+
+typedef enum
+{
+ BTE_UNKNOWN,
+ BTE_REGULAR,
+ BTE_COMMENT,
+ BTE_PREAMBLE,
+ BTE_MACRODEF
+/*
+ BTE_ALIAS,
+ BTE_MODIFY
+*/
+} bt_metatype;
+
+#define NUM_METATYPES ((int) BTE_MACRODEF + 1)
+
+typedef enum
+{
+ BTAST_BOGUS, /* to detect uninitialized nodes */
+ BTAST_ENTRY,
+ BTAST_KEY,
+ BTAST_FIELD,
+ BTAST_STRING,
+ BTAST_NUMBER,
+ BTAST_MACRO
+} bt_nodetype;
+
+typedef enum
+{
+ BTN_FIRST, BTN_VON, BTN_LAST, BTN_JR, BTN_NONE
+} bt_namepart;
+
+typedef enum
+{
+ BTJ_MAYTIE, /* "discretionary" tie between words */
+ BTJ_SPACE, /* force a space between words */
+ BTJ_FORCETIE, /* force a tie (~ in TeX) */
+ BTJ_NOTHING /* nothing between words */
+} bt_joinmethod;
+
+
+#define USER_DEFINED_AST 1
+
+#define zzcr_ast(ast,attr,tok,txt) \
+{ \
+ (ast)->filename = InputFilename; \
+ (ast)->line = (attr)->line; \
+ (ast)->offset = (attr)->offset; \
+ (ast)->text = strdup ((attr)->text); \
+}
+
+#define zzd_ast(ast) \
+/* printf ("zzd_ast: free'ing ast node with string %p (%s)\n", \
+ (ast)->text, (ast)->text); */ \
+ if ((ast)->text != NULL) free ((ast)->text);
+
+
+#ifdef USER_DEFINED_AST
+typedef struct _ast
+{
+ struct _ast *right, *down;
+ char * filename;
+ int line;
+ int offset;
+ bt_nodetype nodetype;
+ bt_metatype metatype;
+ char * text;
+} AST;
+#endif /* USER_DEFINED_AST */
+
+
+typedef struct
+{
+ /*
+ * `string' is the string that has been split; items[0] ...
+ * items[num_items-1] are pointers into `string', or NULL for empty
+ * substrings. Note that `string' is actually a copy of the string
+ * passed in to bt_split_list() with NULs inserted between substrings.
+ */
+
+ char * string;
+ int num_items;
+ char ** items;
+} bt_stringlist;
+
+
+typedef struct
+{
+ bt_stringlist * tokens; /* flat list of all tokens in name */
+ char ** parts[BT_MAX_NAMEPARTS]; /* each elt. is list of pointers */
+ /* into `tokens->string' */
+ int part_len[BT_MAX_NAMEPARTS]; /* length in tokens */
+} bt_name;
+
+
+typedef struct tex_tree_s
+{
+ char * start;
+ int len;
+ struct tex_tree_s
+ * child,
+ * next;
+} bt_tex_tree;
+
+
+typedef struct
+{
+ /* These determine the order (and presence) of parts in the name. */
+ int num_parts;
+ bt_namepart parts[BT_MAX_NAMEPARTS];
+
+ /*
+ * These lists are always in the order of the bt_namepart enum -- *not*
+ * dependent on the particular order of parts the user specified! (This
+ * will make it a bit harder if I ever allow more than one occurrence of
+ * a part in a format; since I don't allow that, I'm not [yet] worried
+ * about it!)
+ */
+ const char * pre_part[BT_MAX_NAMEPARTS];
+ char * post_part[BT_MAX_NAMEPARTS];
+ char * pre_token[BT_MAX_NAMEPARTS];
+ const char * post_token[BT_MAX_NAMEPARTS];
+ boolean abbrev[BT_MAX_NAMEPARTS];
+ bt_joinmethod join_tokens[BT_MAX_NAMEPARTS];
+ bt_joinmethod join_part[BT_MAX_NAMEPARTS];
+} bt_name_format;
+
+
+typedef enum
+{
+ BTERR_NOTIFY, /* notification about next action */
+ BTERR_CONTENT, /* warning about the content of a record */
+ BTERR_LEXWARN, /* warning in lexical analysis */
+ BTERR_USAGEWARN, /* warning about library usage */
+ BTERR_LEXERR, /* error in lexical analysis */
+ BTERR_SYNTAX, /* error in parser */
+ BTERR_USAGEERR, /* fatal error in library usage */
+ BTERR_INTERNAL /* my fault */
+} bt_errclass;
+
+typedef enum
+{
+ BTACT_NONE, /* do nothing on error */
+ BTACT_CRASH, /* call exit(1) */
+ BTACT_ABORT /* call abort() */
+} bt_erraction;
+
+typedef struct
+{
+ bt_errclass errclass;
+ char * filename;
+ int line;
+ const char * item_desc;
+ int item;
+ char * message;
+} bt_error;
+
+typedef void (*bt_err_handler) (bt_error *);
+
+
+#if defined(__cplusplus__) || defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Function prototypes */
+
+/*
+ * First, we might need a prototype for strdup() (because the zzcr_ast
+ * macro uses it, and that macro is used in pccts/ast.c -- which I don't
+ * want to modify if I can help it, because it's someone else's code).
+ * This is to accomodate AIX, where including <string.h> apparently doesn't
+ * declare strdup() (reported by Reiner Schlotte
+ * <[email protected]>), and compiling bibtex.c (which
+ * includes pccts/ast.c) crashes because of this (yes, yes, I know it
+ * should just be a warning -- I don't know what's going on there!).
+ *
+ * Unfortunately, this duplicates code in bt_config.h -- I can't include
+ * bt_config.h here, because this header must be freestanding; I don't want
+ * to include bt_config.h in pccts/ast.c, because I don't want to touch the
+ * PCCTS code if I can help it; but I don't want every source file that
+ * uses strdup() to have to include btparse.h. Hence the duplication.
+ * Yuck.
+ */
+#ifndef HAVE_STRDUP_DECL
+# define HAVE_STRDUP_DECL 0
+#endif
+#if !HAVE_STRDUP_DECL
+extern char *strdup (const char *s);
+#endif
+
+
+/* init.c */
+void bt_initialize (void);
+void bt_free_ast (AST *ast);
+void bt_cleanup (void);
+
+/* input.c */
+void bt_set_stringopts (bt_metatype metatype, ushort options);
+AST * bt_parse_entry_s (char * entry_text,
+ char * filename,
+ int line,
+ ushort options,
+ boolean * status);
+AST * bt_parse_entry (FILE * infile,
+ char * filename,
+ ushort options,
+ boolean * status);
+AST * bt_parse_file (char * filename,
+ ushort options,
+ boolean * overall_status);
+
+/* postprocess.c */
+void bt_postprocess_string (char * s, ushort options);
+char * bt_postprocess_value (AST * value, ushort options, boolean replace);
+char * bt_postprocess_field (AST * field, ushort options, boolean replace);
+void bt_postprocess_entry (AST * entry, ushort options);
+
+/* error.c */
+void bt_reset_error_counts (void);
+int bt_get_error_count (bt_errclass errclass);
+int * bt_get_error_counts (int *counts);
+ushort bt_error_status (int *saved_counts);
+
+/* macros.c */
+void bt_add_macro_value (AST *assignment, ushort options);
+void bt_add_macro_text (char * macro, char * text, char * filename, int line);
+void bt_delete_macro (char * macro);
+void bt_delete_all_macros (void);
+int bt_macro_length (char *macro);
+char * bt_macro_text (char * macro, char * filename, int line);
+
+/* traversal.c */
+AST *bt_next_entry (AST *entry_list, AST *prev_entry);
+bt_metatype bt_entry_metatype (AST *entry);
+char *bt_entry_type (AST *entry);
+char *bt_entry_key (AST *entry);
+AST *bt_next_field (AST *entry, AST *prev, char **name);
+AST *bt_next_macro (AST *entry, AST *prev, char **name);
+AST *bt_next_value (AST *head,
+ AST *prev,
+ bt_nodetype *nodetype,
+ char **text);
+char *bt_get_text (AST *node);
+
+/* modify.c */
+void bt_set_text (AST * node, char * new_text);
+void bt_entry_set_key (AST * entry, char * new_key);
+
+/* names.c */
+bt_stringlist * bt_split_list (char * string,
+ char * delim,
+ char * filename,
+ int line,
+ char * description);
+void bt_free_list (bt_stringlist *list);
+bt_name * bt_split_name (char * name,
+ char * filename,
+ int line,
+ int name_num);
+void bt_free_name (bt_name * name);
+
+/* tex_tree.c */
+bt_tex_tree * bt_build_tex_tree (char * string);
+void bt_free_tex_tree (bt_tex_tree **top);
+void bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream);
+char * bt_flatten_tex_tree (bt_tex_tree *top);
+
+/* string_util.c */
+void bt_purify_string (char * string, ushort options);
+void bt_change_case (char transform, char * string, ushort options);
+
+/* format_name.c */
+bt_name_format * bt_create_name_format (char * parts, boolean abbrev_first);
+void bt_free_name_format (bt_name_format * format);
+void bt_set_format_text (bt_name_format * format,
+ bt_namepart part,
+ char * pre_part,
+ char * post_part,
+ char * pre_token,
+ char * post_token);
+void bt_set_format_options (bt_name_format * format,
+ bt_namepart part,
+ boolean abbrev,
+ bt_joinmethod join_tokens,
+ bt_joinmethod join_part);
+char * bt_format_name (bt_name * name, bt_name_format * format);
+
+#if defined(__cplusplus__) || defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* BTPARSE_H */
diff --git a/src/translators/btparse/dlgauto.h b/src/translators/btparse/dlgauto.h
new file mode 100644
index 0000000..efcc3b2
--- /dev/null
+++ b/src/translators/btparse/dlgauto.h
@@ -0,0 +1,408 @@
+/* dlgauto.h automaton
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Will Cohen and Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+
+#ifndef ZZDEFAUTO_H
+#define ZZDEFAUTO_H
+
+zzchar_t *zzlextext; /* text of most recently matched token */
+zzchar_t *zzbegexpr; /* beginning of last reg expr recogn. */
+zzchar_t *zzendexpr; /* beginning of last reg expr recogn. */
+int zzbufsize; /* number of characters in zzlextext */
+int zzbegcol = 0; /* column that first character of token is in*/
+int zzendcol = 0; /* column that last character of token is in */
+int zzline = 1; /* line current token is on */
+int zzreal_line=1; /* line of 1st portion of token that is not skipped */
+int zzchar; /* character to determine next state */
+int zzbufovf; /* indicates that buffer too small for text */
+int zzcharfull = 0;
+static zzchar_t *zznextpos;/* points to next available position in zzlextext*/
+static int zzclass;
+
+void zzerrstd(const char *);
+void (*zzerr)(const char *)=zzerrstd;/* pointer to error reporting function */
+extern int zzerr_in(void);
+
+static FILE *zzstream_in=0;
+static int (*zzfunc_in)() = zzerr_in;
+static zzchar_t *zzstr_in=0;
+
+#ifdef USER_ZZMODE_STACK
+int zzauto = 0;
+#else
+static int zzauto = 0;
+#endif
+static int zzadd_erase;
+static char zzebuf[70];
+
+#ifdef ZZCOL
+#define ZZINC (++zzendcol)
+#else
+#define ZZINC
+#endif
+
+
+#define ZZGETC_STREAM {zzchar = getc(zzstream_in); zzclass = ZZSHIFT(zzchar);}
+#define ZZGETC_FUNC {zzchar = (*zzfunc_in)(); zzclass = ZZSHIFT(zzchar);}
+#define ZZGETC_STR { \
+ if (*zzstr_in){ \
+ zzchar = *zzstr_in; \
+ ++zzstr_in; \
+ }else{ \
+ zzchar = EOF; \
+ } \
+ zzclass = ZZSHIFT(zzchar); \
+}
+
+#define ZZNEWSTATE (newstate = dfa[state][zzclass])
+
+#ifndef ZZCOPY
+#define ZZCOPY \
+ /* Truncate matching buffer to size (not an error) */ \
+ if (zznextpos < lastpos){ \
+ *(zznextpos++) = zzchar; \
+ }else{ \
+ zzbufovf = 1; \
+ }
+#endif
+
+void
+zzrdstream( FILE *f )
+{
+ /* make sure that it is really set to something, otherwise just
+ leave it be.
+ */
+ if (f){
+ /* make sure that there is always someplace to get input
+ before closing zzstream_in
+ */
+ zzline = 1;
+ zzstream_in = f;
+ zzfunc_in = NULL;
+ zzstr_in = 0;
+ zzcharfull = 0;
+ }
+}
+
+void
+zzrdfunc( int (*f)() )
+{
+ /* make sure that it is really set to something, otherwise just
+ leave it be.
+ */
+ if (f){
+ /* make sure that there is always someplace to get input
+ before closing zzstream_in
+ */
+ zzline = 1;
+ zzstream_in = NULL;
+ zzfunc_in = f;
+ zzstr_in = 0;
+ zzcharfull = 0;
+ }
+}
+
+
+void
+zzrdstr( zzchar_t *s )
+{
+ /* make sure that it is really set to something, otherwise just
+ leave it be.
+ */
+ if (s){
+ /* make sure that there is always someplace to get input
+ before closing zzstream_in
+ */
+ zzline = 1;
+ zzstream_in = NULL;
+ zzfunc_in = 0;
+ zzstr_in = s;
+ zzcharfull = 0;
+ }
+}
+
+
+void
+zzclose_stream()
+{
+}
+
+/* saves dlg state, but not what feeds dlg (such as file position) */
+void
+zzsave_dlg_state(struct zzdlg_state *state)
+{
+ state->stream = zzstream_in;
+ state->func_ptr = zzfunc_in;
+ state->str = zzstr_in;
+ state->auto_num = zzauto;
+ state->add_erase = zzadd_erase;
+ state->lookc = zzchar;
+ state->char_full = zzcharfull;
+ state->begcol = zzbegcol;
+ state->endcol = zzendcol;
+ state->line = zzline;
+ state->lextext = zzlextext;
+ state->begexpr = zzbegexpr;
+ state->endexpr = zzendexpr;
+ state->bufsize = zzbufsize;
+ state->bufovf = zzbufovf;
+ state->nextpos = zznextpos;
+ state->class_num = zzclass;
+}
+
+void
+zzrestore_dlg_state(struct zzdlg_state *state)
+{
+ zzstream_in = state->stream;
+ zzfunc_in = state->func_ptr;
+ zzstr_in = state->str;
+ zzauto = state->auto_num;
+ zzadd_erase = state->add_erase;
+ zzchar = state->lookc;
+ zzcharfull = state->char_full;
+ zzbegcol = state->begcol;
+ zzendcol = state->endcol;
+ zzline = state->line;
+ zzlextext = state->lextext;
+ zzbegexpr = state->begexpr;
+ zzendexpr = state->endexpr;
+ zzbufsize = state->bufsize;
+ zzbufovf = state->bufovf;
+ zznextpos = state->nextpos;
+ zzclass = state->class_num;
+}
+
+void
+zzmode( int m )
+{
+ /* points to base of dfa table */
+ if (m<MAX_MODE){
+ zzauto = m;
+ /* have to redo class since using different compression */
+ zzclass = ZZSHIFT(zzchar);
+ }else{
+ sprintf(zzebuf,"Invalid automaton mode = %d ",m);
+ zzerr(zzebuf);
+ }
+}
+
+/* erase what is currently in the buffer, and get a new reg. expr */
+void
+zzskip()
+{
+ zzadd_erase = 1;
+}
+
+/* don't erase what is in the zzlextext buffer, add on to it */
+void
+zzmore()
+{
+ zzadd_erase = 2;
+}
+
+/* substitute c for the reg. expr last matched and is in the buffer */
+void
+zzreplchar(zzchar_t c)
+{
+ /* can't allow overwriting null at end of string */
+ if (zzbegexpr < &zzlextext[zzbufsize-1]){
+ *zzbegexpr = c;
+ *(zzbegexpr+1) = '\0';
+ }
+ zzendexpr = zzbegexpr;
+ zznextpos = zzbegexpr + 1;
+}
+
+/* replace the string s for the reg. expr last matched and in the buffer */
+void
+zzreplstr(register zzchar_t *s)
+{
+ register zzchar_t *l= &zzlextext[zzbufsize -1];
+
+ zznextpos = zzbegexpr;
+ if (s){
+ while ((zznextpos <= l) && (*(zznextpos++) = *(s++))!=0){
+ /* empty */
+ }
+ /* correct for NULL at end of string */
+ zznextpos--;
+ }
+ if ((zznextpos <= l) && (*(--s) == 0)){
+ zzbufovf = 0;
+ }else{
+ zzbufovf = 1;
+ }
+ *(zznextpos) = '\0';
+ zzendexpr = zznextpos - 1;
+}
+
+void
+zzgettok()
+{
+ register int state, newstate;
+ /* last space reserved for the null char */
+ zzchar_t *lastpos; /* GPW 1997/09/05 (removed 'register' */
+
+skip:
+ zzreal_line = zzline;
+ zzbufovf = 0;
+ lastpos = &zzlextext[zzbufsize-1];
+ zznextpos = zzlextext;
+ zzbegcol = zzendcol+1;
+more:
+ zzbegexpr = zznextpos;
+#ifdef ZZINTERACTIVE
+ /* interactive version of automaton */
+ /* if there is something in zzchar, process it */
+ state = newstate = dfa_base[zzauto];
+ if (zzcharfull){
+ ZZINC;
+ ZZCOPY;
+ ZZNEWSTATE;
+ }
+ if (zzstr_in)
+ while (zzalternatives[newstate]){
+ state = newstate;
+ ZZGETC_STR;
+ ZZINC;
+ ZZCOPY;
+ ZZNEWSTATE;
+ }
+ else if (zzstream_in)
+ while (zzalternatives[newstate]){
+ state = newstate;
+ ZZGETC_STREAM;
+ ZZINC;
+ ZZCOPY;
+ ZZNEWSTATE;
+ }
+ else if (zzfunc_in)
+ while (zzalternatives[newstate]){
+ state = newstate;
+ ZZGETC_FUNC;
+ ZZINC;
+ ZZCOPY;
+ ZZNEWSTATE;
+ }
+ /* figure out if last character really part of token */
+ if ((state != dfa_base[zzauto]) && (newstate == DfaStates)){
+ zzcharfull = 1;
+ --zznextpos;
+ }else{
+ zzcharfull = 0;
+ state = newstate;
+ }
+ *(zznextpos) = '\0';
+ /* Able to transition out of start state to some non err state?*/
+ if ( state == dfa_base[zzauto] ){
+ /* make sure doesn't get stuck */
+ zzadvance();
+ }
+#else
+ /* non-interactive version of automaton */
+ if (!zzcharfull)
+ zzadvance();
+ else
+ ZZINC;
+ state = dfa_base[zzauto];
+ if (zzstr_in)
+ while (ZZNEWSTATE != DfaStates){
+ state = newstate;
+ ZZCOPY;
+ ZZGETC_STR;
+ ZZINC;
+ }
+ else if (zzstream_in)
+ while (ZZNEWSTATE != DfaStates){
+ state = newstate;
+ ZZCOPY;
+ ZZGETC_STREAM;
+ ZZINC;
+ }
+ else if (zzfunc_in)
+ while (ZZNEWSTATE != DfaStates){
+ state = newstate;
+ ZZCOPY;
+ ZZGETC_FUNC;
+ ZZINC;
+ }
+ zzcharfull = 1;
+ if ( state == dfa_base[zzauto] ){
+ if (zznextpos < lastpos){
+ *(zznextpos++) = zzchar;
+ }else{
+ zzbufovf = 1;
+ }
+ *zznextpos = '\0';
+ /* make sure doesn't get stuck */
+ zzadvance();
+ }else{
+ *zznextpos = '\0';
+ }
+#endif
+#ifdef ZZCOL
+ zzendcol -= zzcharfull;
+#endif
+ zzendexpr = zznextpos -1;
+ zzadd_erase = 0;
+ (*actions[accepts[state]])();
+ switch (zzadd_erase) {
+ case 1: goto skip;
+ case 2: goto more;
+ }
+}
+
+void
+zzadvance()
+{
+ if (zzstream_in) { ZZGETC_STREAM; zzcharfull = 1; ZZINC;}
+ if (zzfunc_in) { ZZGETC_FUNC; zzcharfull = 1; ZZINC;}
+ if (zzstr_in) { ZZGETC_STR; zzcharfull = 1; ZZINC;}
+ if (!(zzstream_in || zzfunc_in || zzstr_in)){
+ zzerr_in();
+ }
+}
+
+void
+zzerrstd(const char *s)
+{
+ fprintf(stderr,
+ "%s near line %d (text was '%s')\n",
+ ((s == NULL) ? "Lexical error" : s),
+ zzline,zzlextext);
+}
+
+int
+zzerr_in()
+{
+ fprintf(stderr,"No input stream, function, or string\n");
+ /* return eof to get out gracefully */
+ return EOF;
+}
+
+#endif
diff --git a/src/translators/btparse/dlgdef.h b/src/translators/btparse/dlgdef.h
new file mode 100644
index 0000000..ded2c31
--- /dev/null
+++ b/src/translators/btparse/dlgdef.h
@@ -0,0 +1,97 @@
+/* dlgdef.h
+ * Things in scanner produced by dlg that should be visible to the outside
+ * world
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+
+#ifndef ZZDLGDEF_H
+#define ZZDLGDEF_H
+
+#include "btconfig.h"
+
+#ifndef zzchar_t
+#ifdef ZZWCHAR_T
+#define zzchar_t unsigned wchar_t
+#else
+#define zzchar_t unsigned char
+#endif
+#endif
+
+struct zzdlg_state {
+ FILE *stream;
+ int (*func_ptr)();
+ zzchar_t *str;
+ int auto_num;
+ int add_erase;
+ int lookc;
+ int char_full;
+ int begcol, endcol;
+ int line;
+ zzchar_t *lextext, *begexpr, *endexpr;
+ int bufsize;
+ int bufovf;
+ zzchar_t *nextpos;
+ int class_num;
+};
+
+extern zzchar_t *zzlextext; /* text of most recently matched token */
+extern zzchar_t *zzbegexpr; /* beginning of last reg expr recogn. */
+extern zzchar_t *zzendexpr; /* beginning of last reg expr recogn. */
+extern int zzbufsize; /* how long zzlextext is */
+extern int zzbegcol; /* column that first character of token is in*/
+extern int zzendcol; /* column that last character of token is in */
+extern int zzline; /* line current token is on */
+extern int zzreal_line; /* line of 1st portion of token that is not skipped */
+extern int zzchar; /* character to determine next state */
+extern int zzbufovf; /* indicates that buffer too small for text */
+extern void (*zzerr)(const char *);/* pointer to error reporting function */
+
+#ifdef USER_ZZMODE_STACK
+extern int zzauto;
+#endif
+
+extern void zzadvance(void);
+extern void zzskip(void); /* erase zzlextext, look for antoher token */
+extern void zzmore(void); /* keep zzlextext, look for another token */
+extern void zzmode(int k); /* switch to automaton 'k' */
+extern void zzrdstream(FILE *);/* what stream to read from */
+extern void zzclose_stream(void);/* close the current input stream */
+extern void zzrdfunc(int (*)());/* what function to get char from */
+extern void zzrdstr( zzchar_t * );
+extern void zzgettok(void); /* get next token */
+extern void zzreplchar(zzchar_t c);/* replace last recognized reg. expr. with
+ a character */
+extern void zzreplstr(zzchar_t *s);/* replace last recognized reg. expr. with
+ a string */
+extern void zzsave_dlg_state(struct zzdlg_state *);
+extern void zzrestore_dlg_state(struct zzdlg_state *);
+extern int zzerr_in(void);
+extern void zzerrstd(const char *);
+extern void zzerraction();
+
+#endif
diff --git a/src/translators/btparse/err.c b/src/translators/btparse/err.c
new file mode 100644
index 0000000..f143048
--- /dev/null
+++ b/src/translators/btparse/err.c
@@ -0,0 +1,75 @@
+/*
+ * A n t l r S e t s / E r r o r F i l e H e a d e r
+ *
+ * Generated from: bibtex.g
+ *
+ * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995
+ * Parr Research Corporation
+ * with Purdue University Electrical Engineering
+ * With AHPCRC, University of Minnesota
+ * ANTLR Version 1.33
+ */
+
+#include <stdio.h>
+#define ANTLR_VERSION 133
+
+#define ZZCOL
+#define USER_ZZSYN
+
+#include "btconfig.h"
+#include "btparse.h"
+#include "attrib.h"
+#include "lex_auxiliary.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+
+extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */
+#define zzSET_SIZE 4
+#include "antlr.h"
+#include "ast.h"
+#include "tokens.h"
+#include "dlgdef.h"
+#include "err.h"
+
+const ANTLRChar *zztokens[27]={
+ /* 00 */ "Invalid",
+ /* 01 */ "@",
+ /* 02 */ "AT",
+ /* 03 */ "\\n",
+ /* 04 */ "COMMENT",
+ /* 05 */ "[\\ \\r\\t]+",
+ /* 06 */ "~[\\@\\n\\ \\r\\t]+",
+ /* 07 */ "\\n",
+ /* 08 */ "[\\ \\r\\t]+",
+ /* 09 */ "NUMBER",
+ /* 10 */ "NAME",
+ /* 11 */ "LBRACE",
+ /* 12 */ "RBRACE",
+ /* 13 */ "ENTRY_OPEN",
+ /* 14 */ "ENTRY_CLOSE",
+ /* 15 */ "EQUALS",
+ /* 16 */ "HASH",
+ /* 17 */ "COMMA",
+ /* 18 */ "\"",
+ /* 19 */ "\\n~[\\n\\{\\}\\(\\)\"\\]*",
+ /* 20 */ "[\\r\\t]",
+ /* 21 */ "\\{",
+ /* 22 */ "\\}",
+ /* 23 */ "\\(",
+ /* 24 */ "\\)",
+ /* 25 */ "STRING",
+ /* 26 */ "~[\\n\\{\\}\\(\\)\"]+"
+};
+SetWordType zzerr1[4] = {0x0,0x20,0x0,0x2};
+SetWordType zzerr2[4] = {0x0,0x6,0x0,0x0};
+SetWordType zzerr3[4] = {0x0,0x46,0x0,0x2};
+SetWordType zzerr4[4] = {0x0,0x44,0x0,0x0};
+SetWordType setwd1[27] = {0x0,0x7,0x6,0x0,0x0,0x0,0x0,
+ 0x0,0x0,0x28,0x38,0x0,0x0,0x0,0xd0,
+ 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,
+ 0x0,0x0,0x20,0x0};
+SetWordType zzerr5[4] = {0x0,0x6,0x0,0x2};
+SetWordType setwd2[27] = {0x0,0x0,0x0,0x0,0x0,0x0,0x0,
+ 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7,
+ 0x0,0x4,0x7,0x0,0x0,0x0,0x0,0x0,
+ 0x0,0x0,0x0,0x0};
diff --git a/src/translators/btparse/err.h b/src/translators/btparse/err.h
new file mode 100644
index 0000000..d16615d
--- /dev/null
+++ b/src/translators/btparse/err.h
@@ -0,0 +1,700 @@
+/*
+ * err.h
+ *
+ * Standard error handling mechanism
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * Has grown to hold all kinds of stuff (err.h is increasingly misnamed)
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+
+#ifndef ERR_H
+#define ERR_H
+
+#include "btconfig.h"
+
+#include <string.h>
+#include <stdarg.h>
+
+#ifdef DUM
+/* Define usable bits per unsigned int word (used for set stuff) */
+#ifdef PC
+#define BSETWORDSIZE 16
+#define BSETLOGWORDSIZE 4
+#else
+#define BSETWORDSIZE 32
+#define BSETLOGWORDSIZE 5
+#endif
+#endif
+
+#define BSETWORDSIZE 8
+#define BSETLOGWORDSIZE 3 /* SetWordType is 8bits */
+
+#define BSETMODWORD(x) ((x) & (BSETWORDSIZE-1)) /* x % BSETWORDSIZE */
+#define BSETDIVWORD(x) ((x) >> BSETLOGWORDSIZE) /* x / BSETWORDSIZE */
+
+/* This is not put into the global pccts_parser structure because it is
+ * hidden and does not need to be saved during a "save state" operation
+ */
+/* maximum of 32 bits/unsigned int and must be 8 bits/byte */
+static SetWordType bitmask[] = {
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080
+};
+
+void
+zzresynch(SetWordType *wd,SetWordType mask)
+{
+ static int consumed = 1;
+
+ /* if you enter here without having consumed a token from last resynch
+ * force a token consumption.
+ */
+ if ( !consumed ) {zzCONSUME; return;}
+
+ /* if current token is in resynch set, we've got what we wanted */
+ if ( wd[LA(1)]&mask || LA(1) == zzEOF_TOKEN ) {consumed=0; return;}
+
+ /* scan until we find something in the resynch set */
+ while ( !(wd[LA(1)]&mask) && LA(1) != zzEOF_TOKEN ) {zzCONSUME;}
+ consumed=1;
+}
+
+void
+zzconsumeUntil(SetWordType *st)
+{
+ while ( !zzset_el(LA(1), st) ) { zzCONSUME; }
+}
+
+void
+zzconsumeUntilToken(int t)
+{
+ while ( LA(1)!=t ) { zzCONSUME; }
+}
+
+/* input looks like:
+ * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText)
+ * where the zzMiss stuff is set here to the token that did not match
+ * (and which set wasn't it a member of).
+ */
+void
+zzFAIL(int k, ...)
+{
+#ifdef LL_K
+ static char text[LL_K*ZZLEXBUFSIZE+1];
+ SetWordType *f[LL_K];
+#else
+ static char text[ZZLEXBUFSIZE+1];
+ SetWordType *f[1];
+#endif
+ SetWordType **miss_set;
+ char **miss_text;
+ int *bad_tok;
+ char **bad_text;
+ int *err_k;
+ int i;
+ va_list ap;
+/* Removed because it shadows a parameter. gcc 3.4 complains.
+ I think removing it preserves the behavior of gcc 3.3 and previous.
+ int k;
+*/
+ va_start(ap, k);
+ text[0] = '\0';
+ for (i=1; i<=k; i++) /* collect all lookahead sets */
+ {
+ f[i-1] = va_arg(ap, SetWordType *);
+ }
+ for (i=1; i<=k; i++) /* look for offending token */
+ {
+ if ( i>1 ) strcat(text, " ");
+ strcat(text, LATEXT(i));
+ if ( !zzset_el((unsigned)LA(i), f[i-1]) ) break;
+ }
+ miss_set = va_arg(ap, SetWordType **);
+ miss_text = va_arg(ap, char **);
+ bad_tok = va_arg(ap, int *);
+ bad_text = va_arg(ap, char **);
+ err_k = va_arg(ap, int *);
+ if ( i>k )
+ {
+ /* bad; lookahead is permutation that cannot be matched,
+ * but, the ith token of lookahead is valid at the ith position
+ * (The old LL sub 1 (k) versus LL(k) parsing technique)
+ */
+ *miss_set = NULL;
+ *miss_text = zzlextext;
+ *bad_tok = LA(1);
+ *bad_text = LATEXT(1);
+ *err_k = k;
+ return;
+ }
+/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
+ *miss_set = f[i-1];
+ *miss_text = text;
+ *bad_tok = LA(i);
+ *bad_text = LATEXT(i);
+ if ( i==1 ) *err_k = 1;
+ else *err_k = k;
+}
+
+void
+zzsave_antlr_state(zzantlr_state *buf)
+{
+#ifdef LL_K
+ int i;
+#endif
+
+#ifdef ZZCAN_GUESS
+ buf->guess_start = zzguess_start;
+ buf->guessing = zzguessing;
+#endif
+ buf->asp = zzasp;
+#ifdef GENAST
+ buf->ast_sp = zzast_sp;
+#endif
+#ifdef ZZINF_LOOK
+ buf->inf_labase = zzinf_labase;
+ buf->inf_last = zzinf_last;
+#endif
+#ifdef DEMAND_LOOK
+ buf->dirty = zzdirty;
+#endif
+#ifdef LL_K
+ for (i=0; i<LL_K; i++) buf->tokenLA[i] = zztokenLA[i];
+ for (i=0; i<LL_K; i++) strcpy(buf->textLA[i], zztextLA[i]);
+ buf->lap = zzlap;
+ buf->labase = zzlabase;
+#else
+ buf->token = zztoken;
+ strcpy(buf->text, zzlextext);
+#endif
+}
+
+void
+zzrestore_antlr_state(zzantlr_state *buf)
+{
+#ifdef LL_K
+ int i;
+#endif
+
+#ifdef ZZCAN_GUESS
+ zzguess_start = buf->guess_start;
+ zzguessing = buf->guessing;
+#endif
+ zzasp = buf->asp;
+#ifdef GENAST
+ zzast_sp = buf->ast_sp;
+#endif
+#ifdef ZZINF_LOOK
+ zzinf_labase = buf->inf_labase;
+ zzinf_last = buf->inf_last;
+#endif
+#ifdef DEMAND_LOOK
+ zzdirty = buf->dirty;
+#endif
+#ifdef LL_K
+ for (i=0; i<LL_K; i++) zztokenLA[i] = buf->tokenLA[i];
+ for (i=0; i<LL_K; i++) strcpy(zztextLA[i], buf->textLA[i]);
+ zzlap = buf->lap;
+ zzlabase = buf->labase;
+#else
+ zztoken = buf->token;
+ strcpy(zzlextext, buf->text);
+#endif
+}
+
+void
+zzedecode(SetWordType *a)
+{
+ register SetWordType *p = a;
+ register SetWordType *endp = &(p[zzSET_SIZE]);
+ register unsigned e = 0;
+
+ if ( zzset_deg(a)>1 ) fprintf(stderr, " {");
+ do {
+ register SetWordType t = *p;
+ register SetWordType *b = &(bitmask[0]);
+ do {
+ if ( t & *b ) fprintf(stderr, " %s", zztokens[e]);
+ e++;
+ } while (++b < &(bitmask[sizeof(SetWordType)*8]));
+ } while (++p < endp);
+ if ( zzset_deg(a)>1 ) fprintf(stderr, " }");
+}
+
+#ifndef USER_ZZSYN
+/* standard error reporting function */
+void
+zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text)
+{
+
+ fprintf(stderr, "line %d: syntax error at \"%s\"", zzline, (tok==zzEOF_TOKEN)?"EOF":bad_text);
+ if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
+ if ( k==1 ) fprintf(stderr, " missing");
+ else
+ {
+ fprintf(stderr, "; \"%s\" not", bad_text);
+ if ( zzset_deg(eset)>1 ) fprintf(stderr, " in");
+ }
+ if ( zzset_deg(eset)>0 ) zzedecode(eset);
+ else fprintf(stderr, " %s", zztokens[etok]);
+ if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
+ fprintf(stderr, "\n");
+}
+#endif
+
+/* is b an element of set p? */
+int
+zzset_el(unsigned b, SetWordType *p)
+{
+ return( p[BSETDIVWORD(b)] & bitmask[BSETMODWORD(b)] );
+}
+
+int
+zzset_deg(SetWordType *a)
+{
+ /* Fast compute degree of a set... the number
+ of elements present in the set. Assumes
+ that all word bits are used in the set
+ */
+ register SetWordType *p = a;
+ register SetWordType *endp = &(a[zzSET_SIZE]);
+ register int degree = 0;
+
+ if ( a == NULL ) return 0;
+ while ( p < endp )
+ {
+ register SetWordType t = *p;
+ register SetWordType *b = &(bitmask[0]);
+ do {
+ if (t & *b) ++degree;
+ } while (++b < &(bitmask[sizeof(SetWordType)*8]));
+ p++;
+ }
+
+ return(degree);
+}
+
+#ifdef DEMAND_LOOK
+
+#ifdef LL_K
+int
+_zzmatch(int _t, char **zzBadText, char **zzMissText,
+ int *zzMissTok, int *zzBadTok,
+ SetWordType **zzMissSet)
+{
+ if ( zzdirty==LL_K ) {
+ zzCONSUME;
+ }
+ if ( LA(1)!=_t ) {
+ *zzBadText = *zzMissText=LATEXT(1);
+ *zzMissTok= _t; *zzBadTok=LA(1);
+ *zzMissSet=NULL;
+ return 0;
+ }
+ zzMakeAttr
+ zzdirty++;
+ zzlabase++;
+ return 1;
+}
+
+int
+_zzmatch_wsig(int _t)
+{
+ if ( zzdirty==LL_K ) {
+ zzCONSUME;
+ }
+ if ( LA(1)!=_t ) {
+ return 0;
+ }
+ zzMakeAttr
+ zzdirty++;
+ zzlabase++;
+ return 1;
+}
+
+#else
+
+int
+_zzmatch(int _t, char **zzBadText, char **zzMissText,
+ int *zzMissTok, int *zzBadTok, SetWordType **zzMissSet)
+{
+ if ( zzdirty ) {zzCONSUME;}
+ if ( LA(1)!=_t ) {
+ *zzBadText = *zzMissText=LATEXT(1);
+ *zzMissTok= _t; *zzBadTok=LA(1);
+ *zzMissSet=NULL;
+ return 0;
+ }
+ zzdirty = 1;
+ zzMakeAttr
+ return 1;
+}
+
+int
+_zzmatch_wsig(int _t)
+{
+ if ( zzdirty ) {zzCONSUME;}
+ if ( LA(1)!=_t ) {
+ return 0;
+ }
+ zzdirty = 1;
+ zzMakeAttr
+ return 1;
+}
+
+#endif /*LL_K*/
+
+#else
+
+int
+_zzmatch(int _t, const char **zzBadText, const char **zzMissText,
+ int *zzMissTok, int *zzBadTok,
+ SetWordType **zzMissSet)
+{
+ if ( LA(1)!=_t ) {
+ *zzBadText = *zzMissText=LATEXT(1);
+ *zzMissTok= _t; *zzBadTok=LA(1);
+ *zzMissSet=NULL;
+ return 0;
+ }
+ zzMakeAttr
+ return 1;
+}
+
+int
+_zzmatch_wsig(int _t)
+{
+ if ( LA(1)!=_t ) return 0;
+ zzMakeAttr
+ return 1;
+}
+
+#endif /*DEMAND_LOOK*/
+
+#ifdef ZZINF_LOOK
+void
+_inf_zzgettok(void)
+{
+ if ( zzinf_labase >= zzinf_last )
+ {NLA = zzEOF_TOKEN; strcpy(NLATEXT, "");}
+ else {
+ NLA = zzinf_tokens[zzinf_labase];
+ zzline = zzinf_line[zzinf_labase]; /* wrong in 1.21 */
+ strcpy(NLATEXT, zzinf_text[zzinf_labase]);
+ zzinf_labase++;
+ }
+}
+#endif
+
+#ifdef ZZINF_LOOK
+/* allocate default size text,token and line arrays;
+ * then, read all of the input reallocing the arrays as needed.
+ * Once the number of total tokens is known, the LATEXT(i) array (zzinf_text)
+ * is allocated and it's pointers are set to the tokens in zzinf_text_buffer.
+ */
+void
+zzfill_inf_look(void)
+{
+ int tok, line;
+ int zzinf_token_buffer_size = ZZINF_DEF_TOKEN_BUFFER_SIZE;
+ int zzinf_text_buffer_size = ZZINF_DEF_TEXT_BUFFER_SIZE;
+ int zzinf_text_buffer_index = 0;
+ int zzinf_lap = 0;
+
+ /* allocate text/token buffers */
+ zzinf_text_buffer = (char *) malloc(zzinf_text_buffer_size);
+ if ( zzinf_text_buffer == NULL )
+ {
+ fprintf(stderr, "cannot allocate lookahead text buffer (%d bytes)\n",
+ zzinf_text_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+ zzinf_tokens = (int *) calloc(zzinf_token_buffer_size,sizeof(int));
+ if ( zzinf_tokens == NULL )
+ {
+ fprintf(stderr, "cannot allocate token buffer (%d tokens)\n",
+ zzinf_token_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+ zzinf_line = (int *) calloc(zzinf_token_buffer_size,sizeof(int));
+ if ( zzinf_line == NULL )
+ {
+ fprintf(stderr, "cannot allocate line buffer (%d ints)\n",
+ zzinf_token_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+
+ /* get tokens, copying text to text buffer */
+ zzinf_text_buffer_index = 0;
+ do {
+ zzgettok();
+ line = zzreal_line;
+ while ( zzinf_lap>=zzinf_token_buffer_size )
+ {
+ zzinf_token_buffer_size += ZZINF_BUFFER_TOKEN_CHUNK_SIZE;
+ zzinf_tokens = (int *) realloc(zzinf_tokens,
+ zzinf_token_buffer_size*sizeof(int));
+ if ( zzinf_tokens == NULL )
+ {
+ fprintf(stderr, "cannot allocate lookahead token buffer (%d tokens)\n",
+ zzinf_token_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+ zzinf_line = (int *) realloc(zzinf_line,
+ zzinf_token_buffer_size*sizeof(int));
+ if ( zzinf_line == NULL )
+ {
+ fprintf(stderr, "cannot allocate lookahead line buffer (%d ints)\n",
+ zzinf_token_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+
+ }
+ while ( (zzinf_text_buffer_index+strlen(NLATEXT)+1) >= zzinf_text_buffer_size )
+ {
+ zzinf_text_buffer_size += ZZINF_BUFFER_TEXT_CHUNK_SIZE;
+ zzinf_text_buffer = (char *) realloc(zzinf_text_buffer,
+ zzinf_text_buffer_size);
+ if ( zzinf_text_buffer == NULL )
+ {
+ fprintf(stderr, "cannot allocate lookahead text buffer (%d bytes)\n",
+ zzinf_text_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+ }
+ /* record token and text and line of input symbol */
+ tok = zzinf_tokens[zzinf_lap] = NLA;
+ strcpy(&zzinf_text_buffer[zzinf_text_buffer_index], NLATEXT);
+ zzinf_text_buffer_index += strlen(NLATEXT)+1;
+ zzinf_line[zzinf_lap] = line;
+ zzinf_lap++;
+ } while (tok!=zzEOF_TOKEN);
+ zzinf_labase = 0;
+ zzinf_last = zzinf_lap-1;
+
+ /* allocate ptrs to text of ith token */
+ zzinf_text = (char **) calloc(zzinf_last+1,sizeof(char *));
+ if ( zzinf_text == NULL )
+ {
+ fprintf(stderr, "cannot allocate lookahead text buffer (%d)\n",
+ zzinf_text_buffer_size);
+ exit(PCCTS_EXIT_FAILURE);
+ }
+ zzinf_text_buffer_index = 0;
+ zzinf_lap = 0;
+ /* set ptrs so that zzinf_text[i] is the text of the ith token found on input */
+ while (zzinf_lap<=zzinf_last)
+ {
+ zzinf_text[zzinf_lap++] = &zzinf_text_buffer[zzinf_text_buffer_index];
+ zzinf_text_buffer_index += strlen(&zzinf_text_buffer[zzinf_text_buffer_index])+1;
+ }
+}
+#endif
+
+int
+_zzsetmatch(SetWordType *e, char **zzBadText, char **zzMissText,
+ int *zzMissTok, int *zzBadTok,
+ SetWordType **zzMissSet)
+{
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ if ( zzdirty==LL_K ) {zzCONSUME;}
+#else
+ if ( zzdirty ) {zzCONSUME;}
+#endif
+#endif
+ if ( !zzset_el((unsigned)LA(1), e) ) {
+ *zzBadText = LATEXT(1); *zzMissText=NULL;
+ *zzMissTok= 0; *zzBadTok=LA(1);
+ *zzMissSet=e;
+ return 0;
+ }
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ zzdirty++;
+#else
+ zzdirty = 1;
+#endif
+#endif
+ zzMakeAttr
+ return 1;
+}
+
+int
+_zzmatch_wdfltsig(int tokenWanted, SetWordType *whatFollows)
+{
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ if ( zzdirty==LL_K ) {
+ zzCONSUME;
+ }
+#else
+ if ( zzdirty ) {zzCONSUME;}
+#endif
+#endif
+
+ if ( LA(1)!=tokenWanted )
+ {
+ fprintf(stderr,
+ "line %d: syntax error at \"%s\" missing %s\n",
+ zzline,
+ (LA(1)==zzEOF_TOKEN)?"<eof>":(char*)LATEXT(1),
+ zztokens[tokenWanted]);
+ zzconsumeUntil( whatFollows );
+ return 0;
+ }
+ else {
+ zzMakeAttr
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ zzdirty++;
+ zzlabase++;
+#else
+ zzdirty = 1;
+#endif
+#else
+/* zzCONSUME; consume if not demand lookahead */
+#endif
+ return 1;
+ }
+}
+
+int
+_zzsetmatch_wdfltsig(SetWordType *tokensWanted,
+ int tokenTypeOfSet,
+ SetWordType *whatFollows)
+{
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ if ( zzdirty==LL_K ) {zzCONSUME;}
+#else
+ if ( zzdirty ) {zzCONSUME;}
+#endif
+#endif
+ if ( !zzset_el((unsigned)LA(1), tokensWanted) )
+ {
+ fprintf(stderr,
+ "line %d: syntax error at \"%s\" missing %s\n",
+ zzline,
+ (LA(1)==zzEOF_TOKEN)?"<eof>":(char*)LATEXT(1),
+ zztokens[tokenTypeOfSet]);
+ zzconsumeUntil( whatFollows );
+ return 0;
+ }
+ else {
+ zzMakeAttr
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ zzdirty++;
+ zzlabase++;
+#else
+ zzdirty = 1;
+#endif
+#else
+/* zzCONSUME; consume if not demand lookahead */
+#endif
+ return 1;
+ }
+}
+
+int
+_zzsetmatch_wsig(SetWordType *e)
+{
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ if ( zzdirty==LL_K ) {zzCONSUME;}
+#else
+ if ( zzdirty ) {zzCONSUME;}
+#endif
+#endif
+ if ( !zzset_el((unsigned)LA(1), e) ) return 0;
+#ifdef DEMAND_LOOK
+#ifdef LL_K
+ zzdirty++;
+#else
+ zzdirty = 1;
+#endif
+#endif
+ zzMakeAttr
+ return 1;
+}
+
+#ifdef USER_ZZMODE_STACK
+static int zzmstk[ZZMAXSTK] = { -1 };
+static int zzmdep = 0;
+static char zzmbuf[70];
+
+void
+zzmpush( int m )
+{
+ if(zzmdep == ZZMAXSTK - 1) {
+ sprintf(zzmbuf, "Mode stack overflow ");
+ zzerr(zzmbuf);
+ } else {
+ zzmstk[zzmdep++] = zzauto;
+ zzmode(m);
+ }
+}
+
+void
+zzmpop( void )
+{
+ if(zzmdep == 0)
+ { sprintf(zzmbuf, "Mode stack underflow ");
+ zzerr(zzmbuf);
+ }
+ else
+ { zzmdep--;
+ zzmode(zzmstk[zzmdep]);
+ }
+}
+
+void
+zzsave_mode_stack( int modeStack[], int *modeLevel )
+{
+ int i;
+ memcpy(modeStack, zzmstk, sizeof(zzmstk));
+ *modeLevel = zzmdep;
+ zzmdep = 0;
+
+ return;
+}
+
+void
+zzrestore_mode_stack( int modeStack[], int *modeLevel )
+{
+ int i;
+
+ memcpy(zzmstk, modeStack, sizeof(zzmstk));
+ zzmdep = *modeLevel;
+
+ return;
+}
+#endif /* USER_ZZMODE_STACK */
+
+#endif /* ERR_H */
diff --git a/src/translators/btparse/error.c b/src/translators/btparse/error.c
new file mode 100644
index 0000000..26f2fb2
--- /dev/null
+++ b/src/translators/btparse/error.c
@@ -0,0 +1,348 @@
+/* ------------------------------------------------------------------------
+@NAME : error.c
+@DESCRIPTION: Anything relating to reporting or recording errors and
+ warnings.
+@GLOBALS : errclass_names
+ err_actions
+ err_handlers
+ errclass_counts
+ error_buf
+@CALLS :
+@CREATED : 1996/08/28, Greg Ward
+@MODIFIED :
+@VERSION : $Id: error.c,v 2.5 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include "btparse.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+
+
+#define NUM_ERRCLASSES ((int) BTERR_INTERNAL + 1)
+
+
+static const char *errclass_names[NUM_ERRCLASSES] =
+{
+ NULL, /* BTERR_NOTIFY */
+ "warning", /* BTERR_CONTENT */
+ "warning", /* BTERR_LEXWARN */
+ "warning", /* BTERR_USAGEWARN */
+ "error", /* BTERR_LEXERR */
+ "syntax error", /* BTERR_SYNTAX */
+ "fatal error", /* BTERR_USAGEERR */
+ "internal error" /* BTERR_INTERNAL */
+};
+
+static const bt_erraction err_actions[NUM_ERRCLASSES] =
+{
+ BTACT_NONE, /* BTERR_NOTIFY */
+ BTACT_NONE, /* BTERR_CONTENT */
+ BTACT_NONE, /* BTERR_LEXWARN */
+ BTACT_NONE, /* BTERR_USAGEWARN */
+ BTACT_NONE, /* BTERR_LEXERR */
+ BTACT_NONE, /* BTERR_SYNTAX */
+ BTACT_CRASH, /* BTERR_USAGEERR */
+ BTACT_ABORT /* BTERR_INTERNAL */
+};
+
+void print_error (bt_error *err);
+
+static bt_err_handler err_handlers[NUM_ERRCLASSES] =
+{
+ print_error,
+ print_error,
+ print_error,
+ print_error,
+ print_error,
+ print_error,
+ print_error,
+ print_error
+};
+
+static int errclass_counts[NUM_ERRCLASSES] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+static char error_buf[MAX_ERROR+1];
+
+
+/* ----------------------------------------------------------------------
+ * Error-handling functions.
+ */
+
+void print_error (bt_error *err)
+{
+ const char * name;
+ boolean something_printed;
+
+ something_printed = FALSE;
+
+ if (err->filename)
+ {
+ fprintf (stderr, err->filename);
+ something_printed = TRUE;
+ }
+ if (err->line > 0) /* going to print a line number? */
+ {
+ if (something_printed)
+ fprintf (stderr, ", ");
+ fprintf (stderr, "line %d", err->line);
+ something_printed = TRUE;
+ }
+ if (err->item_desc && err->item > 0) /* going to print an item number? */
+ {
+ if (something_printed)
+ fprintf (stderr, ", ");
+ fprintf (stderr, "%s %d", err->item_desc, err->item);
+ something_printed = TRUE;
+ }
+
+ name = errclass_names[(int) err->errclass];
+ if (name)
+ {
+ if (something_printed)
+ fprintf (stderr, ", ");
+ fprintf (stderr, name);
+ something_printed = TRUE;
+ }
+
+ if (something_printed)
+ fprintf (stderr, ": ");
+
+ fprintf (stderr, "%s\n", err->message);
+
+} /* print_error() */
+
+
+
+/* ----------------------------------------------------------------------
+ * Error-reporting functions: these are called anywhere in the library
+ * when we encounter an error.
+ */
+
+void
+report_error (bt_errclass errclass,
+ char * filename,
+ int line,
+ const char * item_desc,
+ int item,
+ const char * fmt,
+ va_list arglist)
+{
+ bt_error err;
+#if !HAVE_VSNPRINTF
+ int msg_len;
+#endif
+
+ err.errclass = errclass;
+ err.filename = filename;
+ err.line = line;
+ err.item_desc = item_desc;
+ err.item = item;
+
+ errclass_counts[(int) errclass]++;
+
+
+ /*
+ * Blech -- we're writing to a static buffer because there's no easy
+ * way to know how long the error message is going to be. (Short of
+ * reimplementing printf(), or maybe printf()'ing to a dummy file
+ * and using the return value -- ugh!) The GNU C library conveniently
+ * supplies vsnprintf(), which neatly solves this problem by truncating
+ * the output string if it gets too long. (I could check for this
+ * truncation if I wanted to, but I don't think it's necessary given the
+ * ample size of the message buffer.) For non-GNU systems, though,
+ * we're stuck with using vsprintf()'s return value. This can't be
+ * trusted on all systems -- thus there's a check for it in configure.
+ * Also, this won't necessarily trigger the internal_error() if we
+ * do overflow; it's conceivable that vsprintf() itself would crash.
+ * At least doing it this way we avoid the possibility of vsprintf()
+ * silently corrupting some memory, and crashing unpredictably at some
+ * later point.
+ */
+
+#if HAVE_VSNPRINTF
+ vsnprintf (error_buf, MAX_ERROR, fmt, arglist);
+#else
+ msg_len = vsprintf (error_buf, fmt, arglist);
+ if (msg_len > MAX_ERROR)
+ internal_error ("static error message buffer overflowed");
+#endif
+
+ err.message = error_buf;
+ if (err_handlers[errclass])
+ (*err_handlers[errclass]) (&err);
+
+ switch (err_actions[errclass])
+ {
+ case BTACT_NONE: return;
+ case BTACT_CRASH: exit (1);
+ case BTACT_ABORT: abort ();
+ default: internal_error ("invalid error action %d for class %d (%s)",
+ (int) err_actions[errclass],
+ (int) errclass, errclass_names[errclass]);
+ }
+
+} /* report_error() */
+
+
+GEN_ERRFUNC (general_error,
+ (bt_errclass errclass,
+ char * filename,
+ int line,
+ const char * item_desc,
+ int item,
+ char * fmt,
+ ...),
+ errclass, filename, line, item_desc, item, fmt)
+
+GEN_ERRFUNC (error,
+ (bt_errclass errclass,
+ char * filename,
+ int line,
+ char * fmt,
+ ...),
+ errclass, filename, line, NULL, -1, fmt)
+
+GEN_ERRFUNC (ast_error,
+ (bt_errclass errclass,
+ AST * ast,
+ char * fmt,
+ ...),
+ errclass, ast->filename, ast->line, NULL, -1, fmt)
+
+GEN_ERRFUNC (notify,
+ (const char * fmt, ...),
+ BTERR_NOTIFY, NULL, -1, NULL, -1, fmt)
+
+GEN_ERRFUNC (usage_warning,
+ (const char * fmt, ...),
+ BTERR_USAGEWARN, NULL, -1, NULL, -1, fmt)
+
+GEN_ERRFUNC (usage_error,
+ (const char * fmt, ...),
+ BTERR_USAGEERR, NULL, -1, NULL, -1, fmt)
+
+GEN_ERRFUNC (internal_error,
+ (const char * fmt, ...),
+ BTERR_INTERNAL, NULL, -1, NULL, -1, fmt)
+
+
+/* ======================================================================
+ * Functions to be used outside of the library
+ */
+
+/* ------------------------------------------------------------------------
+@NAME : bt_reset_error_counts()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Resets all the error counters to zero.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/08, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void bt_reset_error_counts (void)
+{
+ int i;
+
+ for (i = 0; i < NUM_ERRCLASSES; i++)
+ errclass_counts[i] = 0;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_get_error_count()
+@INPUT : errclass
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Returns number of errors seen in the specified class.
+@GLOBALS : errclass_counts
+@CALLS :
+@CREATED :
+@MODIFIED :
+-------------------------------------------------------------------------- */
+int bt_get_error_count (bt_errclass errclass)
+{
+ return errclass_counts[errclass];
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_get_error_counts()
+@INPUT : counts - pointer to an array big enough to hold all the counts
+ if NULL, the array will be allocated for you (and you
+ must free() it when done with it)
+@OUTPUT :
+@RETURNS : counts - either the passed-in pointer, or the newly-
+ allocated array if you pass in NULL
+@DESCRIPTION: Returns a newly-allocated array with the number of errors
+ in each error class, indexed by the members of the
+ eclass_t enum.
+@GLOBALS : errclass_counts
+@CALLS :
+@CREATED : 1997/01/06, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+int *bt_get_error_counts (int *counts)
+{
+ int i;
+
+ if (counts == NULL)
+ counts = (int *) malloc (sizeof (int) * NUM_ERRCLASSES);
+ for (i = 0; i < NUM_ERRCLASSES; i++)
+ counts[i] = errclass_counts[i];
+
+ return counts;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_error_status
+@INPUT : saved_counts - an array of error counts as returned by
+ bt_get_error_counts, or NULL not to compare
+ to a previous checkpoint
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Computes a bitmap where a bit is set for each error class
+ that has more errors now than it used to have (or, if
+ saved_counts is NULL, the bit is set of there are have been
+ any errors in the corresponding error class).
+
+ Eg. "x & (1<<E_SYNTAX)" (where x is returned by bt_error_status)
+ is true if there have been any syntax errors.
+@GLOBALS :
+@CALLS :
+@CREATED :
+@MODIFIED :
+-------------------------------------------------------------------------- */
+ushort bt_error_status (int *saved_counts)
+{
+ int i;
+ ushort status;
+
+ status = 0;
+
+ if (saved_counts)
+ {
+ for (i = 0; i < NUM_ERRCLASSES; i++)
+ status |= ( (errclass_counts[i] > saved_counts[i]) << i);
+ }
+ else
+ {
+ for (i = 0; i < NUM_ERRCLASSES; i++)
+ status |= ( (errclass_counts[i] > 0) << i);
+ }
+
+ return status;
+} /* bt_error_status () */
diff --git a/src/translators/btparse/error.h b/src/translators/btparse/error.h
new file mode 100644
index 0000000..aede151
--- /dev/null
+++ b/src/translators/btparse/error.h
@@ -0,0 +1,65 @@
+/* ------------------------------------------------------------------------
+@NAME : error.c
+@DESCRIPTION: Prototypes for the error-generating functions (i.e. functions
+ defined in error.c, and meant only for use elswhere in the
+ library).
+@CREATED : Summer 1996, Greg Ward
+@MODIFIED :
+@VERSION : $Id: error.h,v 1.11 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+#ifndef ERROR_H
+#define ERROR_H
+
+#include <stdarg.h>
+#include "btparse.h" /* for AST typedef */
+
+#define MAX_ERROR 1024
+
+#define ERRFUNC_BODY(class,filename,line,item_desc,item,format) \
+{ \
+ va_list arglist; \
+ \
+ va_start (arglist, format); \
+ report_error (class, filename, line, item_desc, item, format, arglist); \
+ va_end (arglist); \
+}
+
+#define GEN_ERRFUNC(name,params,class,filename,line,item_desc,item,format) \
+void name params \
+ERRFUNC_BODY (class, filename, line, item_desc, item, format)
+
+#define GEN_PRIVATE_ERRFUNC(name,params, \
+ class,filename,line,item_desc,item,format) \
+static GEN_ERRFUNC(name,params,class,filename,line,item_desc,item,format)
+
+/*
+ * Prototypes for functions exported by error.c but only used within
+ * the library -- functions that can be called by outsiders are declared
+ * in btparse.h.
+ */
+
+void print_error (bt_error *err);
+void report_error (bt_errclass class,
+ char * filename, int line, const char * item_desc, int item,
+ const char * format, va_list arglist);
+
+void general_error (bt_errclass class,
+ char * filename, int line, const char * item_desc, int item,
+ char * format, ...);
+void error (bt_errclass class, char * filename, int line, char * format, ...);
+void ast_error (bt_errclass class, AST * ast, char * format, ...);
+
+void notify (const char *format,...);
+void usage_warning (const char * format, ...);
+void usage_error (const char * format, ...);
+void internal_error (const char * format, ...);
+
+#endif
diff --git a/src/translators/btparse/format_name.c b/src/translators/btparse/format_name.c
new file mode 100644
index 0000000..d6c99ae
--- /dev/null
+++ b/src/translators/btparse/format_name.c
@@ -0,0 +1,841 @@
+/* ------------------------------------------------------------------------
+@NAME : format_name.c
+@DESCRIPTION: bt_format_name() and support functions: everything needed
+ to turn a bt_name structure (as returned by bt_split_name())
+ back into a string according to a highly customizable format.
+@GLOBALS :
+@CREATED :
+@MODIFIED :
+@VERSION : $Id: format_name.c,v 1.12 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "btparse.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+#include "bt_debug.h"
+
+
+static char EmptyString[] = "";
+
+
+#if DEBUG
+/* prototypes to shut "gcc -Wmissing-prototypes" up */
+void print_tokens (char *partname, char **tokens, int num_tokens);
+void dump_name (bt_name * name);
+void dump_format (bt_name_format * format);
+#endif
+
+
+/* ----------------------------------------------------------------------
+ * Interface to create/customize bt_name_format structures
+ */
+
+/* ------------------------------------------------------------------------
+@NAME : bt_create_name_format
+@INPUT : parts - a string of letters (maximum four, from the set
+ f, v, l, j, with no repetition) denoting the order
+ and presence of name parts. Also used to determine
+ certain pre-part text strings.
+ abbrev_first - flag: should first names be abbreviated?
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Creates a bt_name_format structure, slightly customized
+ according to the caller's choice of token order and
+ whether to abbreviate the first name. Use
+ bt_free_name_format() to free the structure (and any sub-
+ structures that may be allocated here). Use
+ bt_set_format_text() and bt_set_format_options() for
+ further customization of the format structure; do not
+ fiddle its fields directly.
+
+ Fills in the structures `parts' field according to `parts'
+ string: 'f' -> BTN_FIRST, and so on.
+
+ Sets token join methods: inter-token join (within each part)
+ is set to BTJ_MAYTIE (a "discretionary tie") for all parts;
+ inter-part join is set to BTJ_SPACE, except for a 'von'
+ token immediately preceding a 'last' token; there, we have
+ a discretionary tie.
+
+ Sets abbreviation flags: FALSE for everything except `first',
+ which follows `abbrev_first' argument.
+
+ Sets surrounding text (pre- and post-part, pre- and post-
+ token): empty string for everything, except:
+ - post-token for 'first' is "." if abbrev_first true
+ - if 'jr' immediately preceded by 'last':
+ pre-part for 'jr' is ", ", join for 'last' is nothing
+ - if 'first' immediately preceded by 'last'
+ pre-part for 'first' is ", " , join for 'last' is nothing
+ - if 'first' immediately preceded by 'jr' and 'jr' immediately
+ preceded by 'last':
+ pre-part for 'first' and 'jr' is ", " ,
+ join for 'last' and 'jr' is nothing
+@CREATED : 1997/11/02, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+bt_name_format *
+bt_create_name_format (char * parts, boolean abbrev_first)
+{
+ int num_parts;
+ int num_valid_parts;
+ bt_name_format *
+ format;
+ int part_pos[BT_MAX_NAMEPARTS];
+ int i;
+
+ /*
+ * Check that the part list (a string with one letter -- f, v, l, or j
+ * -- for each part is valid: no longer than four characters, and no
+ * invalid characters.
+ */
+
+ num_parts = strlen (parts);
+ num_valid_parts = strspn (parts, BT_VALID_NAMEPARTS);
+ if (num_parts > BT_MAX_NAMEPARTS)
+ {
+ usage_error ("bt_create_name_format: part list must have no more than "
+ "%d letters", BT_MAX_NAMEPARTS);
+ }
+ if (num_valid_parts != num_parts)
+ {
+ usage_error ("bt_create_name_format: bad part abbreviation \"%c\" "
+ "(must be one of \"%s\")",
+ parts[num_valid_parts], BT_VALID_NAMEPARTS);
+ }
+
+
+ /* User input is OK -- let's create the structure */
+
+ format = (bt_name_format *) malloc (sizeof (bt_name_format));
+ format->num_parts = num_parts;
+ for (i = 0; i < num_parts; i++)
+ {
+ switch (parts[i])
+ {
+ case 'f': format->parts[i] = BTN_FIRST; break;
+ case 'v': format->parts[i] = BTN_VON; break;
+ case 'l': format->parts[i] = BTN_LAST; break;
+ case 'j': format->parts[i] = BTN_JR; break;
+ default: internal_error ("bad part abbreviation \"%c\"", parts[i]);
+ }
+ part_pos[format->parts[i]] = i;
+ }
+ for (; i < BT_MAX_NAMEPARTS; i++)
+ {
+ format->parts[i] = BTN_NONE;
+ }
+
+
+ /*
+ * Set the token join methods: between tokens for all parts is a
+ * discretionary tie, and the join between parts is a space (except for
+ * 'von': if followed by 'last', we will have a discretionary tie).
+ */
+ for (i = 0; i < num_parts; i++)
+ {
+ format->join_tokens[i] = BTJ_MAYTIE;
+ format->join_part[i] = BTJ_SPACE;
+ }
+ if (part_pos[BTN_VON] + 1 == part_pos[BTN_LAST])
+ format->join_part[BTN_VON] = BTJ_MAYTIE;
+
+
+ /*
+ * Now the abbreviation flags: follow 'abbrev_first' flag for 'first',
+ * and FALSE for everything else.
+ */
+ format->abbrev[BTN_FIRST] = abbrev_first;
+ format->abbrev[BTN_VON] = FALSE;
+ format->abbrev[BTN_LAST] = FALSE;
+ format->abbrev[BTN_JR] = FALSE;
+
+
+
+ /*
+ * Now fill in the "surrounding text" fields (pre- and post-part, pre-
+ * and post-token) -- start out with everything NULL (empty string),
+ * and then tweak it to handle abbreviated first names, 'jr' following
+ * 'last', and 'first' following 'last' or 'last' and 'jr'. In the
+ * last three cases, we put in some pre-part text (", "), and also
+ * set the join method for the *previous* part (jr or last) to
+ * BTJ_NOTHING, so we don't get extraneous space before the ", ".
+ */
+ for (i = 0; i < BT_MAX_NAMEPARTS; i++)
+ {
+ format->pre_part[i] = EmptyString;
+ format->post_part[i] = EmptyString;
+ format->pre_token[i] = EmptyString;
+ format->post_token[i] = EmptyString;
+ }
+
+ /* abbreviated first name:
+ * "Blow J" -> "Blow J.", or "J Blow" -> "J. Blow"
+ */
+ if (abbrev_first)
+ {
+ format->post_token[BTN_FIRST] = ".";
+ }
+ /* 'jr' after 'last': "Joe Blow Jr." -> "Joe Blow, Jr." */
+ if (part_pos[BTN_JR] == part_pos[BTN_LAST]+1)
+ {
+ format->pre_part[BTN_JR] = ", ";
+ format->join_part[BTN_LAST] = BTJ_NOTHING;
+ /* 'first' after 'last' and 'jr': "Blow, Jr. Joe"->"Blow, Jr., Joe" */
+ if (part_pos[BTN_FIRST] == part_pos[BTN_JR]+1)
+ {
+ format->pre_part[BTN_FIRST] = ", ";
+ format->join_part[BTN_JR] = BTJ_NOTHING;
+ }
+ }
+ /* first after last: "Blow Joe" -> "Blow, Joe" */
+ if (part_pos[BTN_FIRST] == part_pos[BTN_LAST]+1)
+ {
+ format->pre_part[BTN_FIRST] = ", ";
+ format->join_part[BTN_LAST] = BTJ_NOTHING;
+ }
+
+ DBG_ACTION
+ (1, printf ("bt_create_name_format(): returning structure %p\n", format))
+
+ return format;
+
+} /* bt_create_name_format() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_free_name_format()
+@INPUT : format - free()'d, so this is an invalid pointer after the call
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Frees a bt_name_format structure created by
+ bt_create_name_format().
+@CREATED : 1997/11/02, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_free_name_format (bt_name_format * format)
+{
+ free (format);
+}
+
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_set_format_text
+@INPUT : format - the format structure to update
+ part - which name-part to change the surrounding text for
+ pre_part - "pre-part" text, or NULL to leave alone
+ post_part - "post-part" text, or NULL to leave alone
+ pre_token - "pre-token" text, or NULL to leave alone
+ post_token - "post-token" text, or NULL to leave alone
+@OUTPUT : format - pre_part, post_part, pre_token, post_token
+ arrays updated (only those with corresponding
+ non-NULL parameters are touched)
+@RETURNS :
+@DESCRIPTION: Sets the "surrounding text" for a particular name part in
+ a name format structure.
+@CREATED : 1997/11/02, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_set_format_text (bt_name_format * format,
+ bt_namepart part,
+ char * pre_part,
+ char * post_part,
+ char * pre_token,
+ char * post_token)
+{
+ if (pre_part) format->pre_part[part] = pre_part;
+ if (post_part) format->post_part[part] = post_part;
+ if (pre_token) format->pre_token[part] = pre_token;
+ if (post_token) format->post_token[part] = post_token;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_set_format_options()
+@INPUT : format
+ part
+ abbrev
+ join_tokens
+ join_part
+@OUTPUT : format - abbrev, join_tokens, join_part arrays all updated
+@RETURNS :
+@DESCRIPTION: Sets various formatting options for a particular name part in
+ a name format structure.
+@CREATED : 1997/11/02, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_set_format_options (bt_name_format * format,
+ bt_namepart part,
+ boolean abbrev,
+ bt_joinmethod join_tokens,
+ bt_joinmethod join_part)
+{
+ format->abbrev[part] = abbrev;
+ format->join_tokens[part] = join_tokens;
+ format->join_part[part] = join_part;
+}
+
+
+
+/* ----------------------------------------------------------------------
+ * Functions for actually formatting a name (given a name and a name
+ * format structure).
+ */
+
+/* ------------------------------------------------------------------------
+@NAME : count_virtual_char()
+@INPUT : string
+ offset
+@OUTPUT : vchar_count
+@INOUT : depth
+ in_special
+@RETURNS :
+@DESCRIPTION: Munches a single physical character from a string, updating
+ the virtual character count, the depth, and an "in special
+ character" flag.
+
+ The virtual character count is incremented by any character
+ not part of a special character, and also by the right-brace
+ that closes a special character. The depth is incremented by
+ a left brace, and decremented by a right brace. in_special
+ is set to TRUE when we encounter a left brace at depth zero
+ that is immediately followed by a backslash; it is set to
+ false when we encounter the end of the special character,
+ i.e. when in_special is TRUE and we hit a right brace that
+ brings us back to depth zero.
+
+ *vchar_count and *depth should both be set to zero the first
+ time you call count_virtual_char() on a particular string,
+ and in_special should be set to FALSE.
+@CALLS :
+@CALLERS : string_length()
+ string_prefix()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+count_virtual_char (char * string,
+ int offset,
+ int * vchar_count,
+ int * depth,
+ boolean * in_special)
+{
+ switch (string[offset])
+ {
+ case '{':
+ {
+ /* start of a special char? */
+ if (*depth == 0 && string[offset+1] == '\\')
+ *in_special = TRUE;
+ (*depth)++;
+ break;
+ }
+ case '}':
+ {
+ /* end of a special char? */
+ if (*depth == 1 && *in_special)
+ {
+ *in_special = FALSE;
+ (*vchar_count)++;
+ }
+ (*depth)--;
+ break;
+ }
+ default:
+ {
+ /* anything else? (possibly inside a special char) */
+ if (! *in_special) (*vchar_count)++;
+ }
+ }
+} /* count_virtual_char () */
+
+
+/* this should probably be publicly available, documented, etc. */
+/* ------------------------------------------------------------------------
+@NAME : string_length()
+@INPUT : string
+@OUTPUT :
+@RETURNS : "virtual length" of `string'
+@DESCRIPTION: Counts the number of "virtual characters" in a string. A
+ virtual character is either an entire BibTeX special character,
+ or any character outside of a special character.
+
+ Thus, "Hello" has virtual length 5, and so does
+ "H{\\'e}ll{\\\"o}". "{\\noop Hello there how are you?}" has
+ virtual length one.
+@CALLS : count_virtual_char()
+@CALLERS : format_name()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static int
+string_length (char * string)
+{
+ int length;
+ int depth;
+ boolean in_special;
+ int i;
+
+ length = 0;
+ depth = 0;
+ in_special = FALSE;
+
+ for (i = 0; string[i] != 0; i++)
+ {
+ count_virtual_char (string, i, &length, &depth, &in_special);
+ }
+
+ return length;
+} /* string_length() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : string_prefix()
+@INPUT : string
+ prefix_len
+@OUTPUT :
+@RETURNS : physical length of the prefix of `string' with a virtual length
+ of `prefix_len'
+@DESCRIPTION: Counts the number of physical characters from the beginning
+ of `string' needed to extract a sub-string with virtual
+ length `prefix_len'.
+@CALLS : count_virtual_char()
+@CALLERS : format_name()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static int
+string_prefix (char * string, int prefix_len)
+{
+ int i;
+ int vchars_seen;
+ int depth;
+ boolean in_special;
+
+ vchars_seen = 0;
+ depth = 0;
+ in_special = FALSE;
+
+ for (i = 0; string[i] != 0; i++)
+ {
+ count_virtual_char (string, i, &vchars_seen, &depth, &in_special);
+ if (vchars_seen == prefix_len)
+ return i+1;
+ }
+
+ return i;
+
+} /* string_prefix() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : append_text()
+@INOUT : string
+@INPUT : offset
+ text
+ start
+ len
+@OUTPUT :
+@RETURNS : number of characters copied from text+start to string+offset
+@DESCRIPTION: Copies at most `len' characters from text+start to
+ string+offset. (I don't use strcpy() or strncpy() for this
+ because I need to get the number of characters actually
+ copied.)
+@CALLS :
+@CALLERS : format_name()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static int
+append_text (char * string,
+ int offset,
+ const char * text,
+ int start,
+ int len)
+{
+ int i;
+
+ if (text == NULL) return 0; /* no text -- none appended! */
+
+ for (i = 0; text[start+i] != 0; i++)
+ {
+ if (len > 0 && i == len)
+ break; /* exit loop without i++, right?!? */
+ string[offset+i] = text[start+i];
+ } /* for i */
+
+ return i; /* number of characters copied */
+
+} /* append_text () */
+
+
+/* ------------------------------------------------------------------------
+@NAME : append_join
+@INOUT : string
+@INPUT : offset
+ method
+ should_tie
+@OUTPUT :
+@RETURNS : number of charactersa appended to string+offset (either 0 or 1)
+@DESCRIPTION: Copies a "join character" ('~' or ' ') or nothing to
+ string+offset, according to the join method specified by
+ `method' and the `should_tie' flag.
+
+ Specifically: if `method' is BTJ_SPACE, a space is appended
+ and 1 is returned; if `method' is BTJ_FORCETIE, a TeX "tie"
+ character ('~') is appended and 1 is returned. If `method'
+ is BTJ_NOTHING, `string' is unchanged and 0 is returned. If
+ `method' is BTJ_MAYTIE then either a tie (if should_tie is
+ true) or a space (otherwise) is appended, and 1 is returned.
+@CALLS :
+@CALLERS : format_name()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+@COMMENTS : This should allow "tie" strings other than TeX's '~' -- I
+ think this could be done by putting a "tie string" field in
+ the name format structure, and using it here.
+-------------------------------------------------------------------------- */
+static int
+append_join (char * string,
+ int offset,
+ bt_joinmethod method,
+ boolean should_tie)
+{
+ switch (method)
+ {
+ case BTJ_MAYTIE: /* a "discretionary tie" -- pay */
+ { /* attention to should_tie */
+ if (should_tie)
+ string[offset] = '~';
+ else
+ string[offset] = ' ';
+ return 1;
+ }
+ case BTJ_SPACE:
+ {
+ string[offset] = ' ';
+ return 1;
+ }
+ case BTJ_FORCETIE:
+ {
+ string[offset] = '~';
+ return 1;
+ }
+ case BTJ_NOTHING:
+ {
+ return 0;
+ }
+ default:
+ internal_error ("bad token join method %d", (int) method);
+ }
+
+ return 0; /* can't happen -- just here to */
+ /* keep gcc -Wall happy */
+} /* append_join () */
+
+
+#define STRLEN(s) (s == NULL) ? 0 : strlen (s)
+
+/* ------------------------------------------------------------------------
+@NAME : format_firstpass()
+@INPUT : name
+ format
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Makes the first pass over a name for formatting, in order to
+ establish an upper bound on the length of the formatted name.
+@CALLS :
+@CALLERS : bt_format_name()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static unsigned
+format_firstpass (bt_name * name,
+ bt_name_format * format)
+{
+ int i; /* loop over parts */
+ int j; /* loop over tokens */
+ unsigned max_length;
+ bt_namepart part;
+ char ** tok;
+ int num_tok;
+
+ max_length = 0;
+
+ for (i = 0; i < format->num_parts; i++)
+ {
+ part = format->parts[i]; /* 'cause I'm a lazy typist */
+ tok = name->parts[part];
+ num_tok = name->part_len[part];
+
+ assert ((tok != NULL) == (num_tok > 0));
+ if (tok)
+ {
+ max_length += STRLEN (format->pre_part[part]);
+ max_length += STRLEN (format->post_part[part]);
+ max_length += STRLEN (format->pre_token[part]) * num_tok;
+ max_length += STRLEN (format->post_token[part]) * num_tok;
+ max_length += num_tok + 1; /* one join char per token, plus */
+ /* join char to next part */
+
+ /*
+ * We ignore abbreviation here -- just overestimates the maximum
+ * length, so no big deal. Also saves us the bother of computing
+ * the physical length of the prefix of virtual length 1.
+ */
+ for (j = 0; j < num_tok; j++)
+ max_length += strlen (tok[j]);
+ }
+
+ } /* for i (loop over parts) */
+
+ return max_length;
+
+} /* format_firstpass() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : format_name()
+@INPUT : format
+ tokens - token list (eg. from format_firstpass())
+ num_tokens - token count list (eg. from format_firstpass())
+@OUTPUT : fname - filled in, must be preallocated by caller
+@RETURNS :
+@DESCRIPTION: Performs the second pass over a name and format, to actually
+ put the name into a single string according to `format'.
+@CALLS :
+@CALLERS : bt_format_name()
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+format_name (bt_name_format * format,
+ char *** tokens,
+ int * num_tokens,
+ char * fname)
+{
+ bt_namepart parts[BT_MAX_NAMEPARTS]; /* culled list from format */
+ int num_parts;
+
+ int offset; /* into fname */
+ int i; /* loop over parts */
+ int j; /* loop over tokens */
+ bt_namepart part;
+ int prefix_len;
+ int token_len; /* "physical" length (characters) */
+ int token_vlen; /* "virtual" length (special char */
+ /* counts as one character) */
+ boolean should_tie;
+
+ /*
+ * Cull format->parts down by keeping only those parts that are actually
+ * present in the current name (keeps the main loop simpler: makes it
+ * easy to know if the "next part" is present or not, so we know whether
+ * to append a join character.
+ */
+ num_parts = 0;
+ for (i = 0; i < format->num_parts; i++)
+ {
+ part = format->parts[i];
+ if (tokens[part]) /* name actually has this part */
+ parts[num_parts++] = part;
+ }
+
+ offset = 0;
+ token_vlen = -1; /* sanity check, and keeps */
+ /* "gcc -O -Wall" happy */
+
+ for (i = 0; i < num_parts; i++)
+ {
+ part = parts[i];
+
+ offset += append_text (fname, offset,
+ format->pre_part[part], 0, -1);
+
+ for (j = 0; j < num_tokens[part]; j++)
+ {
+ offset += append_text (fname, offset,
+ format->pre_token[part], 0, -1);
+ if (format->abbrev[part])
+ {
+ prefix_len = string_prefix (tokens[part][j], 1);
+ token_len = append_text (fname, offset,
+ tokens[part][j], 0, prefix_len);
+ token_vlen = 1;
+ }
+ else
+ {
+ token_len = append_text (fname, offset,
+ tokens[part][j], 0, -1);
+ token_vlen = string_length (tokens[part][j]);
+ }
+ offset += token_len;
+ offset += append_text (fname, offset,
+ format->post_token[part], 0, -1);
+
+ /* join to next token, but only if there is a next token! */
+ if (j < num_tokens[part]-1)
+ {
+ should_tie = (num_tokens[part] > 1)
+ && (((j == 0) && (token_vlen < 3))
+ || (j == num_tokens[part]-2));
+ offset += append_join (fname, offset,
+ format->join_tokens[part], should_tie);
+ }
+
+ } /* for j */
+
+ offset += append_text (fname, offset,
+ format->post_part[part], 0, -1);
+ /* join to the next part, but again only if there is a next part */
+ if (i < num_parts-1)
+ {
+ if (token_vlen == -1)
+ {
+ internal_error ("token_vlen uninitialized -- no tokens in a part "
+ "that I checked existed");
+ }
+ should_tie = (num_tokens[part] == 1 && token_vlen < 3);
+ offset += append_join (fname, offset,
+ format->join_part[part], should_tie);
+ }
+
+ } /* for i (loop over parts) */
+
+ fname[offset] = 0;
+
+} /* format_name () */
+
+
+#if DEBUG
+
+#define STATIC /* so BibTeX.xs can call 'em too */
+
+/* borrowed print_tokens() and dump_name() from t/name_test.c */
+STATIC void
+print_tokens (char *partname, char **tokens, int num_tokens)
+{
+ int i;
+
+ if (tokens)
+ {
+ printf ("%s = (", partname);
+ for (i = 0; i < num_tokens; i++)
+ {
+ printf ("%s%c", tokens[i], i == num_tokens-1 ? ')' : '|');
+ }
+ putchar ('\n');
+ }
+}
+
+
+STATIC void
+dump_name (bt_name * name)
+{
+ if (name == NULL)
+ {
+ printf (" name: null\n");
+ return;
+ }
+
+ if (name->tokens == NULL)
+ {
+ printf (" name: null token list\n");
+ return;
+ }
+
+ printf (" name (%p):\n", name);
+ printf (" total number of tokens = %d\n", name->tokens->num_items);
+ print_tokens (" first", name->parts[BTN_FIRST], name->part_len[BTN_FIRST]);
+ print_tokens (" von", name->parts[BTN_VON], name->part_len[BTN_VON]);
+ print_tokens (" last", name->parts[BTN_LAST], name->part_len[BTN_LAST]);
+ print_tokens (" jr", name->parts[BTN_JR], name->part_len[BTN_JR]);
+}
+
+
+STATIC void
+dump_format (bt_name_format * format)
+{
+ int i;
+ static char * nameparts[] = { "first", "von", "last", "jr" };
+ static char * joinmethods[] = {"may tie", "space", "force tie", "nothing"};
+
+ printf (" name format (%p):\n", format);
+ printf (" order:");
+ for (i = 0; i < format->num_parts; i++)
+ printf (" %s", nameparts[format->parts[i]]);
+ printf ("\n");
+
+ for (i = 0; i < BT_MAX_NAMEPARTS; i++)
+ {
+ printf (" %-5s: pre-part=%p (%s), post-part=%p (%s)\n",
+ nameparts[i],
+ format->pre_part[i], format->pre_part[i],
+ format->post_part[i], format->post_part[i]);
+ printf (" %-5s pre-token=%p (%s), post-token=%p (%s)\n",
+ "",
+ format->pre_token[i], format->pre_token[i],
+ format->post_token[i],format->post_token[i]);
+ printf (" %-5s abbrev=%s, join_tokens=%s, join_parts=%s\n",
+ "",
+ format->abbrev[i] ? "yes" : "no",
+ joinmethods[format->join_tokens[i]],
+ joinmethods[format->join_part[i]]);
+ }
+}
+#endif
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_format_name()
+@INPUT : name
+ format
+@OUTPUT :
+@RETURNS : formatted name (allocated with malloc(); caller must free() it)
+@DESCRIPTION: Formats an already-split name according to a pre-constructed
+ format structure.
+@GLOBALS :
+@CALLS : format_firstpass(), format_name()
+@CALLERS :
+@CREATED : 1997/11/03, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+char *
+bt_format_name (bt_name * name,
+ bt_name_format * format)
+{
+ unsigned max_length;
+ char * fname;
+
+#if DEBUG >= 2
+ printf ("bt_format_name():\n");
+ dump_name (name);
+ dump_format (format);
+#endif
+
+ max_length = format_firstpass (name, format);
+ fname = (char *) malloc ((max_length+1) * sizeof (char));
+#if 0
+ memset (fname, '_', max_length);
+ fname[max_length] = 0;
+#endif
+ format_name (format, name->parts, name->part_len, fname);
+ assert (strlen (fname) <= max_length);
+ return fname;
+
+} /* bt_format_name() */
diff --git a/src/translators/btparse/init.c b/src/translators/btparse/init.c
new file mode 100644
index 0000000..4a1ec06
--- /dev/null
+++ b/src/translators/btparse/init.c
@@ -0,0 +1,42 @@
+/* ------------------------------------------------------------------------
+@NAME : init.c
+@DESCRIPTION: Initialization and cleanup functions for the btparse library.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/19, Greg Ward
+@MODIFIED :
+@VERSION : $Id: init.c,v 1.8 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include "stdpccts.h" /* for zzfree_ast() prototype */
+#include "parse_auxiliary.h" /* for fix_token_names() proto */
+#include "prototypes.h" /* for other prototypes */
+/*#include "my_dmalloc.h"*/
+
+void bt_initialize (void)
+{
+ /* Initialize data structures */
+
+ fix_token_names ();
+ init_macros ();
+}
+
+
+void bt_free_ast (AST *ast)
+{
+ zzfree_ast (ast);
+}
+
+
+void bt_cleanup (void)
+{
+ done_macros ();
+}
diff --git a/src/translators/btparse/input.c b/src/translators/btparse/input.c
new file mode 100644
index 0000000..dbb7b44
--- /dev/null
+++ b/src/translators/btparse/input.c
@@ -0,0 +1,499 @@
+/* ------------------------------------------------------------------------
+@NAME : input.c
+@DESCRIPTION: Routines for input of BibTeX data.
+@GLOBALS : InputFilename
+ StringOptions
+@CALLS :
+@CREATED : 1997/10/14, Greg Ward (from code in bibparse.c)
+@MODIFIED :
+@VERSION : $Id: input.c,v 1.18 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <assert.h>
+#include "stdpccts.h"
+#include "lex_auxiliary.h"
+#include "prototypes.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+
+
+char * InputFilename;
+ushort StringOptions[NUM_METATYPES] =
+{
+ 0, /* BTE_UNKNOWN */
+ BTO_FULL, /* BTE_REGULAR */
+ BTO_MINIMAL, /* BTE_COMMENT */
+ BTO_MINIMAL, /* BTE_PREAMBLE */
+ BTO_MACRO /* BTE_MACRODEF */
+};
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_set_filename
+@INPUT : filename
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Sets the current input filename -- used for generating
+ error and warning messages.
+@GLOBALS : InputFilename
+@CALLS :
+@CREATED : Feb 1997, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+#if 0
+void bt_set_filename (char *filename)
+{
+ InputFilename = filename;
+}
+#endif
+
+/* ------------------------------------------------------------------------
+@NAME : bt_set_stringopts
+@INPUT : metatype
+ options
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Sets the string-processing options for a particular
+ entry metatype. Used later on by bt_parse_* to determine
+ just how to post-process each particular entry.
+@GLOBALS : StringOptions
+@CREATED : 1997/08/24, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void bt_set_stringopts (bt_metatype metatype, ushort options)
+{
+ if (metatype < BTE_REGULAR || metatype > BTE_MACRODEF)
+ usage_error ("bt_set_stringopts: illegal metatype");
+ if (options & ~BTO_STRINGMASK)
+ usage_error ("bt_set_stringopts: illegal options "
+ "(must only set string option bits");
+
+ StringOptions[metatype] = options;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : start_parse
+@INPUT : infile input stream we'll read from (or NULL if reading
+ from string)
+ instring input string we'll read from (or NULL if reading
+ from stream)
+ line line number of the start of the string (just
+ use 1 if the string is standalone and independent;
+ if it comes from a file, you should supply the
+ line number where it starts for better error
+ messages) (ignored if infile != NULL)
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Prepares things for parsing, in particular initializes the
+ lexical state and lexical buffer, prepares DLG for
+ reading (either from a stream or a string), and reads
+ the first token.
+@GLOBALS :
+@CALLS : initialize_lexer_state()
+ alloc_lex_buffer()
+ zzrdstream() or zzrdstr()
+ zzgettok()
+@CALLERS :
+@CREATED : 1997/06/21, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+start_parse (FILE *infile, char *instring, int line)
+{
+ if ( (infile == NULL) == (instring == NULL) )
+ {
+ internal_error ("start_parse(): exactly one of infile and "
+ "instring may be non-NULL");
+ }
+ initialize_lexer_state ();
+ alloc_lex_buffer (ZZLEXBUFSIZE);
+ if (infile)
+ {
+ zzrdstream (infile);
+ }
+ else
+ {
+ zzrdstr (instring);
+ zzline = line;
+ }
+
+ zzendcol = zzbegcol = 0;
+ zzgettok ();
+}
+
+
+
+/* ------------------------------------------------------------------------
+@NAME : finish_parse()
+@INPUT : err_counts - pointer to error count list (which is local to
+ the parsing functions, hence has to be passed in)
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Frees up what was needed to parse a whole file or a sequence
+ of strings: the lexical buffer and the error count list.
+@GLOBALS :
+@CALLS : free_lex_buffer()
+@CALLERS :
+@CREATED : 1997/06/21, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+finish_parse (int **err_counts)
+{
+ free_lex_buffer ();
+ free (*err_counts);
+ *err_counts = NULL;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : parse_status()
+@INPUT : saved_counts
+@OUTPUT :
+@RETURNS : false if there were serious errors in the recently-parsed input
+ true otherwise (no errors or just warnings)
+@DESCRIPTION: Gets the "error status" bitmap relative to a saved set of
+ error counts and masks of non-serious errors.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/06/21, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static boolean
+parse_status (int *saved_counts)
+{
+ ushort ignore_emask;
+
+ /*
+ * This bit-twiddling fetches the error status (which has a bit
+ * for each error class), masks off the bits for trivial errors
+ * to get "true" if there were any serious errors, and then
+ * returns the opposite of that.
+ */
+ ignore_emask =
+ (1<<BTERR_NOTIFY) | (1<<BTERR_CONTENT) | (1<<BTERR_LEXWARN);
+ return !(bt_error_status (saved_counts) & ~ignore_emask);
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_parse_entry_s()
+@INPUT : entry_text - string containing the entire entry to parse,
+ or NULL meaning we're done, please cleanup
+ options - standard btparse options bitmap
+ line - current line number (if that makes any sense)
+ -- passed to the parser to set zzline, so that
+ lexical and syntax errors are properly localized
+@OUTPUT : *top - newly-allocated AST for the entry
+ (or NULL if entry_text was NULL, ie. at EOF)
+@RETURNS : 1 with *top set to AST for entry on successful read/parse
+ 1 with *top==NULL if entry_text was NULL, ie. at EOF
+ 0 if any serious errors seen in input (*top is still
+ set to the AST, but only for as much of the input as we
+ were able to parse)
+ (A "serious" error is a lexical or syntax error; "trivial"
+ errors such as warnings and notifications count as "success"
+ for the purposes of this function's return value.)
+@DESCRIPTION: Parses a BibTeX entry contained in a string.
+@GLOBALS :
+@CALLS : ANTLR
+@CREATED : 1997/01/18, GPW (from code in bt_parse_entry())
+@MODIFIED :
+-------------------------------------------------------------------------- */
+AST * bt_parse_entry_s (char * entry_text,
+ char * filename,
+ int line,
+ ushort options,
+ boolean * status)
+{
+ AST * entry_ast = NULL;
+ static int * err_counts = NULL;
+
+ if (options & BTO_STRINGMASK) /* any string options set? */
+ {
+ usage_error ("bt_parse_entry_s: illegal options "
+ "(string options not allowed");
+ }
+
+ InputFilename = filename;
+ err_counts = bt_get_error_counts (err_counts);
+
+ if (entry_text == NULL) /* signal to clean up */
+ {
+ finish_parse (&err_counts);
+ if (status) *status = TRUE;
+ return NULL;
+ }
+
+ zzast_sp = ZZAST_STACKSIZE; /* workaround apparent pccts bug */
+ start_parse (NULL, entry_text, line);
+
+ entry (&entry_ast); /* enter the parser */
+ ++zzasp; /* why is this done? */
+
+ if (entry_ast == NULL) /* can happen with very bad input */
+ {
+ if (status) *status = FALSE;
+ return entry_ast;
+ }
+
+#if DEBUG
+ dump_ast ("bt_parse_entry_s: single entry, after parsing:\n",
+ entry_ast);
+#endif
+ bt_postprocess_entry (entry_ast,
+ StringOptions[entry_ast->metatype] | options);
+#if DEBUG
+ dump_ast ("bt_parse_entry_s: single entry, after post-processing:\n",
+ entry_ast);
+#endif
+
+ if (status) *status = parse_status (err_counts);
+ return entry_ast;
+
+} /* bt_parse_entry_s () */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_parse_entry()
+@INPUT : infile - file to read next entry from
+ options - standard btparse options bitmap
+@OUTPUT : *top - AST for the entry, or NULL if no entries left in file
+@RETURNS : same as bt_parse_entry_s()
+@DESCRIPTION: Starts (or continues) parsing from a file.
+@GLOBALS :
+@CALLS :
+@CREATED : Jan 1997, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+AST * bt_parse_entry (FILE * infile,
+ char * filename,
+ ushort options,
+ boolean * status)
+{
+ AST * entry_ast = NULL;
+ static int * err_counts = NULL;
+ static FILE * prev_file = NULL;
+
+ if (prev_file != NULL && infile != prev_file)
+ {
+ usage_error ("bt_parse_entry: you can't interleave calls "
+ "across different files");
+ }
+
+ if (options & BTO_STRINGMASK) /* any string options set? */
+ {
+ usage_error ("bt_parse_entry: illegal options "
+ "(string options not allowed)");
+ }
+
+ InputFilename = filename;
+ err_counts = bt_get_error_counts (err_counts);
+
+ if (feof (infile))
+ {
+ if (prev_file != NULL) /* haven't already done the cleanup */
+ {
+ prev_file = NULL;
+ finish_parse (&err_counts);
+ }
+ else
+ {
+ usage_warning ("bt_parse_entry: second attempt to read past eof");
+ }
+
+ if (status) *status = TRUE;
+ return NULL;
+ }
+
+ /*
+ * Here we do some nasty poking about the innards of PCCTS in order to
+ * enter the parser multiple times on the same input stream. This code
+ * comes from expanding the macro invokation:
+ *
+ * ANTLR (entry (top), infile);
+ *
+ * When LL_K, ZZINF_LOOK, and DEMAND_LOOK are all undefined, this
+ * ultimately expands to
+ *
+ * zzbufsize = ZZLEXBUFSIZE;
+ * {
+ * static char zztoktext[ZZLEXBUFSIZE];
+ * zzlextext = zztoktext;
+ * zzrdstream (f);
+ * zzgettok();
+ * }
+ * entry (top);
+ * ++zzasp;
+ *
+ * (I'm expanding hte zzenterANTLR, zzleaveANTLR, and zzPrimateLookAhead
+ * macros, but leaving ZZLEXBUFSIZE -- a simple constant -- alone.)
+ *
+ * There are two problems with this: 1) zztoktext is a statically
+ * allocated buffer, and when it overflows we just ignore further
+ * characters that should belong to that lexeme; and 2) zzrdstream() and
+ * zzgettok() are called every time we enter the parser, which means the
+ * token left over from the previous entry will be discarded when we
+ * parse entries 2 .. N.
+ *
+ * I handle the static buffer problem with alloc_lex_buffer() and
+ * realloc_lex_buffer() (in lex_auxiliary.c), and by rewriting the ZZCOPY
+ * macro to call realloc_lex_buffer() when overflow is detected.
+ *
+ * I handle the extra token-read by hanging on to a static file
+ * pointer, prev_file, between calls to bt_parse_entry() -- when
+ * the program starts it is NULL, and we reset it to NULL on
+ * finishing a file. Thus, any call that is the first on a given
+ * file will allocate the lexical buffer and read the first token;
+ * thereafter, we skip those steps, and free the buffer on reaching
+ * end-of-file. Currently, this method precludes interleaving
+ * calls to bt_parse_entry() on different files -- perhaps I could
+ * fix this with the zz{save,restore}_{antlr,dlg}_state()
+ * functions?
+ */
+
+ zzast_sp = ZZAST_STACKSIZE; /* workaround apparent pccts bug */
+
+#if defined(LL_K) || defined(ZZINF_LOOK) || defined(DEMAND_LOOK)
+# error One of LL_K, ZZINF_LOOK, or DEMAND_LOOK was defined
+#endif
+ if (prev_file == NULL) /* only read from input stream if */
+ { /* starting afresh with a file */
+ start_parse (infile, NULL, 0);
+ prev_file = infile;
+ }
+ assert (prev_file == infile);
+
+ entry (&entry_ast); /* enter the parser */
+ ++zzasp; /* why is this done? */
+
+ if (entry_ast == NULL) /* can happen with very bad input */
+ {
+ if (status) *status = FALSE;
+ return entry_ast;
+ }
+
+#if DEBUG
+ dump_ast ("bt_parse_entry(): single entry, after parsing:\n",
+ entry_ast);
+#endif
+ bt_postprocess_entry (entry_ast,
+ StringOptions[entry_ast->metatype] | options);
+#if DEBUG
+ dump_ast ("bt_parse_entry(): single entry, after post-processing:\n",
+ entry_ast);
+#endif
+
+ if (status) *status = parse_status (err_counts);
+ return entry_ast;
+
+} /* bt_parse_entry() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_parse_file ()
+@INPUT : filename - name of file to open. If NULL or "-", we read
+ from stdin rather than opening a new file.
+ options
+@OUTPUT : top
+@RETURNS : 0 if any entries in the file had serious errors
+ 1 if all entries were OK
+@DESCRIPTION: Parses an entire BibTeX file, and returns a linked list
+ of ASTs (or, if you like, a forest) for the entries in it.
+ (Any entries with serious errors are omitted from the list.)
+@GLOBALS :
+@CALLS : bt_parse_entry()
+@CREATED : 1997/01/18, from process_file() in bibparse.c
+@MODIFIED :
+@COMMENTS : This function bears a *striking* resemblance to bibparse.c's
+ process_file(). Eventually, I plan to replace this with
+ a generalized process_file() that takes a function pointer
+ to call for each entry. Until I decide on the right interface
+ for that, though, I'm sticking with this simpler (but possibly
+ memory-intensive) approach.
+-------------------------------------------------------------------------- */
+AST * bt_parse_file (char * filename,
+ ushort options,
+ boolean * status)
+{
+ FILE * infile;
+ AST * entries,
+ * cur_entry,
+ * last;
+ boolean entry_status,
+ overall_status;
+
+ if (options & BTO_STRINGMASK) /* any string options set? */
+ {
+ usage_error ("bt_parse_file: illegal options "
+ "(string options not allowed");
+ }
+
+ /*
+ * If a string was given, and it's *not* "-", then open that filename.
+ * Otherwise just use stdin.
+ */
+
+ if (filename != NULL && strcmp (filename, "-") != 0)
+ {
+ InputFilename = filename;
+ infile = fopen (filename, "r");
+ if (infile == NULL)
+ {
+ perror (filename);
+ return 0;
+ }
+ }
+ else
+ {
+ InputFilename = "(stdin)";
+ infile = stdin;
+ }
+
+ entries = NULL;
+ last = NULL;
+
+#if 1
+ /* explicit loop over entries, with junk cleaned out by read_entry () */
+
+ overall_status = TRUE; /* assume success */
+ while ((cur_entry = bt_parse_entry
+ (infile, InputFilename, options, &entry_status)))
+ {
+ overall_status &= entry_status;
+ if (!entry_status) continue; /* bad entry -- try next one */
+ if (!cur_entry) break; /* at eof -- we're done */
+ if (last == NULL) /* this is the first entry */
+ entries = cur_entry;
+ else /* have already seen one */
+ last->right = cur_entry;
+
+ last = cur_entry;
+ }
+
+#else
+ /* let the PCCTS lexer/parser handle everything */
+
+ initialize_lexer_state ();
+ ANTLR (bibfile (top), infile);
+
+#endif
+
+ fclose (infile);
+ InputFilename = NULL;
+ if (status) *status = overall_status;
+ return entries;
+
+} /* bt_parse_file() */
diff --git a/src/translators/btparse/lex_auxiliary.c b/src/translators/btparse/lex_auxiliary.c
new file mode 100644
index 0000000..8fac463
--- /dev/null
+++ b/src/translators/btparse/lex_auxiliary.c
@@ -0,0 +1,939 @@
+/* ------------------------------------------------------------------------
+@NAME : lex_auxiliary.c
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: The code and global variables here have three main purposes:
+ - maintain the lexical buffer (zztoktext, which
+ traditionally with PCCTS is a static array; I have
+ changed things so that it's dynamically allocated and
+ resized on overflow)
+ - keep track of lexical state that's not handled by PCCTS
+ code (like "where are we in terms of BibTeX entries?" or
+ "what are the delimiters for the current entry/string?")
+ - everything called from lexical actions is here, to keep
+ the grammar file itself neat and clean
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : Greg Ward, 1996/07/25-28
+@MODIFIED : Jan 1997
+ Jun 1997
+@VERSION : $Id: lex_auxiliary.c,v 1.31 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <assert.h>
+#include "lex_auxiliary.h"
+#include "stdpccts.h"
+#include "error.h"
+#include "prototypes.h"
+/*#include "my_dmalloc.h"*/
+
+#define DUPE_TEXT 0
+
+extern char * InputFilename; /* from input.c */
+
+GEN_PRIVATE_ERRFUNC (lexical_warning, (const char * fmt, ...),
+ BTERR_LEXWARN, InputFilename, zzline, NULL, -1, fmt)
+GEN_PRIVATE_ERRFUNC (lexical_error, (const char * fmt, ...),
+ BTERR_LEXERR, InputFilename, zzline, NULL, -1, fmt)
+
+
+
+/* ----------------------------------------------------------------------
+ * Global variables
+ */
+
+/* First, the lexical buffer. This is used elsewhere, so can't be static */
+char * zztoktext = NULL;
+
+/*
+ * Now, the lexical state -- first, stuff that arises from scanning
+ * at top-level and the beginnings of entries;
+ * EntryState:
+ * toplevel when we start scanning a file, or when we are in in_entry
+ * mode and see '}' or ')'
+ * after_at when we are in toplevel mode and see an '@'
+ * after_type when we are in after_at mode and see a name (!= 'comment')
+ * in_comment when we are in after_at mode and see a name (== 'comment')
+ * in_entry when we are in after_type mode and see '{' or '('
+ * EntryOpener:
+ * the character ('(' or '{') which opened the entry currently being
+ * scanned (we use this to make sure that the entry opener and closer
+ * match; if not, we issue a warning)
+ * EntryMetatype: (NB. typedef for bt_metatype is in btparse.h)
+ * classifies entries according to the syntax we will use to parse them;
+ * also winds up (after being changed to a bt_nodetype value) in the
+ * node that roots the entry AST:
+ * comment - anything between () or {}
+ * preamble - a single compound value
+ * string - a list of "name = compound_value" assignments; no key
+ * alias - a single "name = compound_value" assignment (where
+ * the compound value in this case is presumably a
+ * name, rather than a string -- this is not syntactically
+ * checked though)
+ * modify,
+ * entry - a key followed by a list of "name = compound_value"
+ * assignments
+ * JunkCount:
+ * the number of non-whitespace, non-'@' characters seen at toplevel
+ * between two entries (used to print out a warning when we hit
+ * the beginning of entry, to help people catch "old style" implicit
+ * comments
+ */
+static enum { toplevel, after_at, after_type, in_comment, in_entry }
+ EntryState;
+static char EntryOpener; /* '(' or '{' */
+static bt_metatype
+ EntryMetatype;
+static int JunkCount; /* non-whitespace chars at toplevel */
+
+/*
+ * String state -- these are maintained and used by the functions called
+ * from actions in the string lexer.
+ * BraceDepth:
+ * brace depth within a string; we can only end the current string
+ * when this is zero
+ * ParenDepth:
+ * parenthesis depth within a string; needed for @comment entries
+ * that are paren-delimited (because the comment in that case is
+ * a paren-delimited string)
+ * StringOpener:
+ * similar to EntryOpener, but stronger than merely warning of token
+ * mismatch -- this determines which character ('"' or '}') can
+ * actually end the string
+ * StringStart:
+ * line on which current string started; if we detect an apparent
+ * runaway, this is used to report where the runaway started
+ * ApparentRunaway:
+ * flags if we have already detected (and warned) that the current
+ * string appears to be a runaway, so that we don't warn again
+ * (and again and again and again)
+ * QuoteWarned:
+ * flags if we have already warned about seeing a '"' in a string,
+ * because they tend to come in pairs and one warning per string
+ * is enough
+ *
+ * (See bibtex.g for an explanation of my runaway string detection heuristic.)
+ */
+static char StringOpener = '\0'; /* '{' or '"' */
+static int BraceDepth; /* depth of brace-nesting */
+static int ParenDepth; /* depth of parenthesis-nesting */
+static int StringStart = -1; /* start line of current string */
+static int ApparentRunaway; /* current string looks like runaway */
+static int QuoteWarned; /* already warned about " in string? */
+
+
+
+/* ----------------------------------------------------------------------
+ * Miscellaneous functions:
+ * lex_info() (handy for debugging)
+ * zzcr_attr() (called from PCCTS-generated code)
+ */
+
+void lex_info (void)
+{
+ printf ("LA(1) = \"%s\" token %d, %s\n", LATEXT(1), LA(1), zztokens[LA(1)]);
+#ifdef LL_K
+ printf ("LA(2) = \"%s\" token %d, %s\n", LATEXT(2), LA(2), zztokens[LA(2)]);
+#endif
+}
+
+
+void zzcr_attr (Attrib *a, int tok, char *txt)
+{
+ if (tok == STRING)
+ {
+ int len = strlen (txt);
+
+ assert ((txt[0] == '{' && txt[len-1] == '}')
+ || (txt[0] == '"' && txt[len-1] == '"'));
+ txt[len-1] = (char) 0; /* remove closing quote from string */
+ txt++; /* so we'll skip the opening quote */
+ }
+
+#if DUPE_TEXT
+ a->text = strdup (txt);
+#else
+ a->text = txt;
+#endif
+ a->token = tok;
+ a->line = zzline;
+ a->offset = zzbegcol;
+#if DEBUG > 1
+ dprintf ("zzcr_attr: input txt = %p (%s)\n", txt, txt);
+ dprintf (" dupe txt = %p (%s)\n", a->text, a->text);
+#endif
+}
+
+
+#if DUPE_TEXT
+void zzd_attr (Attrib *attr)
+{
+ free (attr->text);
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ * Lexical buffer functions:
+ * alloc_lex_buffer()
+ * realloc_lex_buffer()
+ * free_lex_buffer()
+ * lexer_overflow()
+ * zzcopy() (only if ZZCOPY_FUNCTION is defined and true)
+ */
+
+
+/*
+ * alloc_lex_buffer()
+ *
+ * allocates the lexical buffer with `size' characters. Clears the buffer,
+ * points zzlextext at it, and sets zzbufsize to `size'.
+ *
+ * Does nothing if the buffer is already allocated.
+ *
+ * globals: zztoktext, zzlextext, zzbufsize
+ * callers: bt_parse_entry() (in input.c)
+ */
+void alloc_lex_buffer (int size)
+{
+ if (zztoktext == NULL)
+ {
+ zztoktext = (char *) malloc (size * sizeof (char));
+ memset (zztoktext, 0, size);
+ zzlextext = zztoktext;
+ zzbufsize = size;
+ }
+} /* alloc_lex_buffer() */
+
+
+/*
+ * realloc_lex_buffer()
+ *
+ * Reallocates the lexical buffer -- size is increased by `size_increment'
+ * characters (which could be negative). Updates all globals that point
+ * to or into the buffer (zzlextext, zzbegexpr, zzendexpr), as well as
+ * zztoktext (the buffer itself) zzbufsize (the buffer size).
+ *
+ * This is only meant to be called (ultimately) from zzgettok(), part of
+ * the DLG code. (In fact, zzgettok() invokes the ZZCOPY() macro, which
+ * calls lexer_overflow() on buffer overflow, which calls
+ * realloc_lex_buffer(). Whatever.) The `lastpos' and `nextpos' arguments
+ * correspond, respectively, to a local variable in zzgettok() and a static
+ * global in dlgauto.h (hence really in scan.c). They both point into
+ * the lexical buffer, so have to be passed by reference here so that
+ * we can update them to point into the newly-reallocated buffer.
+ *
+ * globals: zztottext, zzbufsize, zzlextext, zzbegexpr, zzendexpr
+ * callers: lexer_overflow()
+ */
+static void
+realloc_lex_buffer (int size_increment,
+ unsigned char ** lastpos,
+ unsigned char ** nextpos)
+{
+ int beg, end, next;
+
+ if (zztoktext == NULL)
+ internal_error ("attempt to reallocate unallocated lexical buffer");
+
+ zztoktext = (char *) realloc (zztoktext, zzbufsize+size_increment);
+ memset (zztoktext+zzbufsize, 0, size_increment);
+ zzbufsize += size_increment;
+
+ beg = zzbegexpr - zzlextext;
+ end = zzendexpr - zzlextext;
+ next = *nextpos - zzlextext;
+ zzlextext = zztoktext;
+
+ if (lastpos != NULL)
+ *lastpos = zzlextext+zzbufsize-1;
+ zzbegexpr = zzlextext + beg;
+ zzendexpr = zzlextext + end;
+ *nextpos = zzlextext + next;
+
+} /* realloc_lex_buffer() */
+
+
+/*
+ * free_lex_buffer()
+ *
+ * Frees the lexical buffer allocated by alloc_lex_buffer().
+ */
+void free_lex_buffer (void)
+{
+ if (zztoktext == NULL)
+ internal_error ("attempt to free unallocated (or already freed) "
+ "lexical buffer");
+
+ free (zztoktext);
+ zztoktext = NULL;
+} /* free_lex_buffer() */
+
+
+/*
+ * lexer_overflow()
+ *
+ * Prints a warning and calls realloc_lex_buffer() to increase the size
+ * of the lexical buffer by ZZLEXBUFSIZE (a constant -- hence the buffer
+ * size increases linearly, not exponentially).
+ *
+ * Also prints a couple of lines of useful debugging stuff if DEBUG is true.
+ */
+void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos)
+{
+#if DEBUG
+ char head[16], tail[16];
+
+ printf ("zzcopy: overflow detected\n");
+ printf (" zzbegcol=%d, zzendcol=%d, zzline=%d\n",
+ zzbegcol, zzendcol, zzline);
+ strncpy (head, zzlextext, 15); head[15] = 0;
+ strncpy (tail, zzlextext+ZZLEXBUFSIZE-15, 15); tail[15] = 0;
+ printf (" zzlextext=>%s...%s< (last char=%d (%c))\n",
+ head, tail,
+ zzlextext[ZZLEXBUFSIZE-1], zzlextext[ZZLEXBUFSIZE-1]);
+ printf (" zzchar = %d (%c), zzbegexpr=zzlextext+%d\n",
+ zzchar, zzchar, zzbegexpr-zzlextext);
+#endif
+
+ notify ("lexical buffer overflowed (reallocating to %d bytes)",
+ zzbufsize+ZZLEXBUFSIZE);
+ realloc_lex_buffer (ZZLEXBUFSIZE, lastpos, nextpos);
+
+} /* lexer_overflow () */
+
+
+#if ZZCOPY_FUNCTION
+/*
+ * zzcopy()
+ *
+ * Does the same as the ZZCOPY macro (in lex_auxiliary.h), but as a
+ * function for easier debugging.
+ */
+void zzcopy (char **nextpos, char **lastpos, int *ovf_flag)
+{
+ if (*nextpos >= *lastpos)
+ {
+ lexer_overflow (lastpos, nextpos);
+ }
+
+ **nextpos = zzchar;
+ (*nextpos)++;
+}
+#endif
+
+
+
+/* ----------------------------------------------------------------------
+ * Report/maintain lexical state
+ * report_state() (only meaningful if DEBUG)
+ * initialize_lexer_state()
+ *
+ * Note that the lexical action functions, below, also fiddle with
+ * the lexical state variables an awful lot.
+ */
+
+#if DEBUG
+char *state_names[] =
+ { "toplevel", "after_at", "after_type", "in_comment", "in_entry" };
+char *metatype_names[] =
+ { "unknown", "comment", "preamble", "string", "alias", "modify", "entry" };
+
+static void
+report_state (char *where)
+{
+ printf ("%s: lextext=%s (line %d, offset %d), token=%d, "
+ "EntryState=%s\n",
+ where, zzlextext, zzline, zzbegcol, NLA,
+ state_names[EntryState]);
+}
+#else
+# define report_state(where)
+/*
+static void
+report_state (char *where) { }
+*/
+#endif
+
+void initialize_lexer_state (void)
+{
+ zzmode (START);
+ EntryState = toplevel;
+ EntryOpener = (char) 0;
+ EntryMetatype = BTE_UNKNOWN;
+ JunkCount = 0;
+}
+
+
+bt_metatype entry_metatype (void)
+{
+ return EntryMetatype;
+}
+
+
+
+/* ----------------------------------------------------------------------
+ * Lexical actions (START and LEX_ENTRY modes)
+ */
+
+/*
+ * newline ()
+ *
+ * Does everything needed to handle newline outside of a quoted string:
+ * increments line counter and skips the newline.
+ */
+void newline (void)
+{
+ zzline++;
+ zzskip();
+}
+
+
+void comment (void)
+{
+ zzline++;
+ zzskip();
+}
+
+
+void at_sign (void)
+{
+ if (EntryState == toplevel)
+ {
+ EntryState = after_at;
+ zzmode (LEX_ENTRY);
+ if (JunkCount > 0)
+ {
+ lexical_warning ("%d characters of junk seen at toplevel", JunkCount);
+ JunkCount = 0;
+ }
+ }
+ else
+ {
+ /* internal_error ("lexer recognized \"@\" at other than top-level"); */
+ lexical_warning ("\"@\" in strange place -- should get syntax error");
+ }
+ report_state ("at_sign");
+}
+
+
+void toplevel_junk (void)
+{
+ JunkCount += strlen (zzlextext);
+ zzskip ();
+}
+
+
+void name (void)
+{
+ report_state ("name (pre)");
+
+ switch (EntryState)
+ {
+ case toplevel:
+ {
+ internal_error ("junk at toplevel (\"%s\")", zzlextext);
+ break;
+ }
+ case after_at:
+ {
+ char * etype = zzlextext;
+ EntryState = after_type;
+
+ if (strcasecmp (etype, "comment") == 0)
+ {
+ EntryMetatype = BTE_COMMENT;
+ EntryState = in_comment;
+ }
+
+ else if (strcasecmp (etype, "preamble") == 0)
+ EntryMetatype = BTE_PREAMBLE;
+
+ else if (strcasecmp (etype, "string") == 0)
+ EntryMetatype = BTE_MACRODEF;
+/*
+ else if (strcasecmp (etype, "alias") == 0)
+ EntryMetatype = BTE_ALIAS;
+
+ else if (strcasecmp (etype, "modify") == 0)
+ EntryMetatype = BTE_MODIFY;
+*/
+ else
+ EntryMetatype = BTE_REGULAR;
+
+ break;
+ }
+ case after_type:
+ case in_comment:
+ case in_entry:
+ break; /* do nothing */
+ }
+
+ report_state ("name (post)");
+
+}
+
+
+void lbrace (void)
+{
+ /*
+ * Currently takes a restrictive view of "when an lbrace is an entry
+ * opener" -- ie. *only* after '@name' (as determined by EntryState),
+ * where name is not 'comment'. This means that lbrace usually
+ * determines a string (in particular, when it's seen at toplevel --
+ * which will happen under certain error situations), which in turn
+ * means that some unexpected things can become strings (like whole
+ * entries).
+ */
+
+ if (EntryState == in_entry || EntryState == in_comment)
+ {
+ start_string ('{');
+ }
+ else if (EntryState == after_type)
+ {
+ EntryState = in_entry;
+ EntryOpener = '{';
+ NLA = ENTRY_OPEN;
+ }
+ else
+ {
+ lexical_warning ("\"{\" in strange place -- should get a syntax error");
+ }
+
+ report_state ("lbrace");
+}
+
+
+void rbrace (void)
+{
+ if (EntryState == in_entry)
+ {
+ if (EntryOpener == '(')
+ lexical_warning ("entry started with \"(\", but ends with \"}\"");
+ NLA = ENTRY_CLOSE;
+ initialize_lexer_state ();
+ }
+ else
+ {
+ lexical_warning ("\"}\" in strange place -- should get a syntax error");
+ }
+ report_state ("rbrace");
+}
+
+
+void lparen (void)
+{
+ if (EntryState == in_comment)
+ {
+ start_string ('(');
+ }
+ else if (EntryState == after_type)
+ {
+ EntryState = in_entry;
+ EntryOpener = '(';
+ }
+ else
+ {
+ lexical_warning ("\"(\" in strange place -- should get a syntax error");
+ }
+ report_state ("lparen");
+}
+
+
+void rparen (void)
+{
+ if (EntryState == in_entry)
+ {
+ if (EntryOpener == '{')
+ lexical_warning ("entry started with \"{\", but ends with \")\"");
+ initialize_lexer_state ();
+ }
+ else
+ {
+ lexical_warning ("\")\" in strange place -- should get a syntax error");
+ }
+ report_state ("rparen");
+}
+
+
+/* ----------------------------------------------------------------------
+ * Stuff for processing strings.
+ */
+
+
+/*
+ * start_string ()
+ *
+ * Called when we see a '{' or '"' in the field data. Records which quote
+ * character was used, and calls open_brace() to increment the depth
+ * counter if it was a '{'. Switches to LEX_STRING mode, and tells the
+ * lexer to continue slurping characters into the same buffer.
+ */
+void start_string (char start_char)
+{
+ StringOpener = start_char;
+ BraceDepth = 0;
+ ParenDepth = 0;
+ StringStart = zzline;
+ ApparentRunaway = 0;
+ QuoteWarned = 0;
+ if (start_char == '{')
+ open_brace ();
+ if (start_char == '(')
+ ParenDepth++;
+ if (start_char == '"' && EntryState == in_comment)
+ {
+ lexical_error ("comment entries must be delimited by either braces or parentheses");
+ EntryState = toplevel;
+ zzmode (START);
+ return;
+ }
+
+#ifdef USER_ZZMODE_STACK
+ if (zzauto != LEX_ENTRY || EntryState != in_entry)
+#else
+ if (EntryState != in_entry && EntryState != in_comment)
+#endif
+ {
+ lexical_warning ("start of string seen at weird place");
+ }
+
+ zzmore ();
+ zzmode (LEX_STRING);
+}
+
+
+/*
+ * end_string ()
+ *
+ * Called when we see either a '"' (at depth 0) or '}' (if it brings us
+ * down to depth 0) in a quoted string. Just makes sure that braces are
+ * balanced, and then goes back to the LEX_FIELD mode.
+ */
+void end_string (char end_char)
+{
+ char match;
+
+#ifndef ALLOW_WARNINGS
+ match = (char) 0; /* silence "might be used" */
+ /* uninitialized" warning */
+#endif
+
+ switch (end_char)
+ {
+ case '}': match = '{'; break;
+ case ')': match = '('; break;
+ case '"': match = '"'; break;
+ default:
+ internal_error ("end_string(): invalid end_char \"%c\"", end_char);
+ }
+
+ assert (StringOpener == match);
+
+ /*
+ * If we're at non-zero BraceDepth, that probably means mismatched braces
+ * somewhere -- complain about it and reset BraceDepth to minimize future
+ * confusion.
+ */
+
+ if (BraceDepth > 0)
+ {
+ lexical_error ("unbalanced braces: too many {'s");
+ BraceDepth = 0;
+ }
+
+ StringOpener = (char) 0;
+ StringStart = -1;
+ NLA = STRING;
+
+ if (EntryState == in_comment)
+ {
+ int len = strlen (zzlextext);
+
+ /*
+ * ARG! no, this is wrong -- what if unbalanced braces in the string
+ * and we try to output put it later?
+ *
+ * ARG! again, this is no more wrong than when we strip quotes in
+ * post_parse.c, and blithely assume that we can put them back on
+ * later for output in BibTeX syntax. Hmmm.
+ *
+ * Actually, it looks like this isn't a problem after all: you
+ * can't have unbalanced braces in a BibTeX string (at least
+ * not as parsed by btparse).
+ */
+
+ if (zzlextext[0] == '(') /* convert to standard quote delims */
+ {
+ zzlextext[ 0] = '{';
+ zzlextext[len-1] = '}';
+ }
+
+ EntryState = toplevel;
+ zzmode (START);
+ }
+ else
+ {
+ zzmode (LEX_ENTRY);
+ }
+
+ report_state ("string");
+}
+
+
+/*
+ * open_brace ()
+ *
+ * Called when we see a '{', either to start a string (in which case
+ * it's called from start_string()) or inside a string (called directly
+ * from the lexer).
+ */
+void open_brace (void)
+{
+ BraceDepth++;
+ zzmore ();
+ report_state ("open_brace");
+}
+
+
+/*
+ * close_brace ()
+ *
+ * Called when we see a '}' inside a string. Decrements the depth counter
+ * and checks to see if we are down to depth 0, in which case the string is
+ * ended and the current lookahead token is set to STRING. Otherwise,
+ * just tells the lexer to keep slurping characters into the buffer.
+ */
+void close_brace (void)
+{
+ BraceDepth--;
+ if (StringOpener == '{' && BraceDepth == 0)
+ {
+ end_string ('}');
+ }
+
+ /*
+ * This could happen if some bonehead puts an unmatched right-brace
+ * in a quote-delimited string (eg. "Hello}"). To attempt to recover,
+ * we reset the depth to zero and continue slurping into the string.
+ */
+ else if (BraceDepth < 0)
+ {
+ lexical_error ("unbalanced braces: too many }'s");
+ BraceDepth = 0;
+ zzmore ();
+ }
+
+ /* Otherwise, it's just any old right brace in a string -- keep eating */
+ else
+ {
+ zzmore ();
+ }
+ report_state ("close_brace");
+}
+
+
+void lparen_in_string (void)
+{
+ ParenDepth++;
+ zzmore ();
+}
+
+
+void rparen_in_string (void)
+{
+ ParenDepth--;
+ if (StringOpener == '(' && ParenDepth == 0)
+ {
+ end_string (')');
+ }
+ else
+ {
+ zzmore ();
+ }
+}
+
+
+/*
+ * quote_in_string ()
+ *
+ * Called when we see '"' in a string. Ends the string if the quote is at
+ * depth 0 and the string was started with a quote, otherwise instructs the
+ * lexer to continue munching happily along. (Also prints a warning,
+ * assuming that input is destined for processing by TeX and you really
+ * want either `` or '' rather than ".)
+ */
+void quote_in_string (void)
+{
+ if (StringOpener == '"' && BraceDepth == 0)
+ {
+ end_string ('"');
+ }
+ else
+ {
+ boolean at_top = FALSE;;
+
+ /*
+ * Note -- this warning assumes that strings are destined
+ * to be processed by TeX, so it should be optional. Hmmm.
+ */
+
+ if (StringOpener == '"' || StringOpener == '(')
+ at_top = (BraceDepth == 0);
+ else if (StringOpener == '{')
+ at_top = (BraceDepth == 1);
+ else
+ internal_error ("Illegal string opener \"%c\"", StringOpener);
+
+ if (!QuoteWarned && at_top)
+ {
+ lexical_warning ("found \" at brace-depth zero in string "
+ "(TeX accents in BibTeX should be inside braces)");
+ QuoteWarned = 1;
+ }
+ zzmore ();
+ }
+}
+
+
+/*
+ * check_runaway_string ()
+ *
+ * Called from the lexer whenever we see a newline in a string. See
+ * bibtex.g for a detailed explanation; basically, this function
+ * looks for an entry start ("@name{") or new field ("name=") immediately
+ * after a newline (with possible whitespace). This is a heuristic
+ * check for runaway strings, under the assumption that text that looks
+ * like a new entry or new field won't actually occur inside a string
+ * very often.
+ */
+void check_runaway_string (void)
+{
+ int len;
+ int i;
+
+ /*
+ * could these be made significantly more efficient by a 256-element
+ * lookup table instead of calling strchr()?
+ */
+ static const char *alpha_chars = "abcdefghijklmnopqrstuvwxyz";
+ static const char *name_chars = "abcdefghijklmnopqrstuvwxyz0123456789:+/'.-";
+
+ /*
+ * on entry: zzlextext contains the whole string, starting with {
+ * and with newlines/tabs converted to space; zzbegexpr points to
+ * a chunk of the string starting with newline (newlines and
+ * tabs have not yet been converted)
+ */
+
+#if DEBUG > 1
+ printf ("check_runaway_string(): zzline=%d\n", zzline);
+ printf ("zzlextext=>%s<\nzzbegexpr=>%s<\n",
+ zzlextext, zzbegexpr);
+#endif
+
+
+ /*
+ * increment zzline to take the leading newline into account -- but
+ * first a sanity check to be sure that newline is there!
+ */
+
+ if (zzbegexpr[0] != '\n')
+ {
+ lexical_warning ("huh? something's wrong (buffer overflow?) near "
+ "offset %d (line %d)", zzendcol, zzline);
+ /* internal_error ("zzbegexpr (line %d, offset %d-%d, "
+ "text >%s<, expr >%s<)"
+ "should start with a newline",
+ zzline, zzbegcol, zzendcol, zzlextext, zzbegexpr);
+ */
+ }
+ else
+ {
+ zzline++;
+ }
+
+ /* standardize whitespace (convert all to space) */
+
+ len = strlen (zzbegexpr);
+ for (i = 0; i < len; i++)
+ {
+ if (isspace (zzbegexpr[i]))
+ zzbegexpr[i] = ' ';
+ }
+
+
+ if (!ApparentRunaway) /* haven't already warned about it */
+ {
+ enum { none, entry, field, giveup } guess;
+
+ i = 1;
+ guess = none;
+ while (i < len && zzbegexpr[i] == ' ') i++;
+
+ if (zzbegexpr[i] == '@')
+ {
+ i++;
+ while (i < len && zzbegexpr[i] == ' ') i++;
+ guess = entry;
+ }
+
+ if (strchr (alpha_chars, tolower (zzbegexpr[i])) != NULL)
+ {
+ while (i < len && strchr (name_chars, tolower (zzbegexpr[i])) != NULL)
+ i++;
+ while (i < len && zzbegexpr[i] == ' ') i++;
+ if (i == len)
+ {
+ guess = giveup;
+ }
+ else
+ {
+ if (guess == entry)
+ {
+ if (zzbegexpr[i] != '{' && zzbegexpr[i] != '(')
+ guess = giveup;
+ }
+ else /* assume it's a field */
+ {
+ if (zzbegexpr[i] == '=')
+ guess = field;
+ else
+ guess = giveup;
+ }
+ }
+ }
+ else /* no name seen after WS or @ */
+ {
+ guess = giveup;
+ }
+
+ if (guess == none)
+ internal_error ("gee, I should have made a guess by now");
+
+ if (guess != giveup)
+ {
+ lexical_warning ("possible runaway string started at line %d",
+ StringStart);
+ ApparentRunaway = 1;
+ }
+ }
+
+ zzmore();
+}
+
diff --git a/src/translators/btparse/lex_auxiliary.h b/src/translators/btparse/lex_auxiliary.h
new file mode 100644
index 0000000..ebbf053
--- /dev/null
+++ b/src/translators/btparse/lex_auxiliary.h
@@ -0,0 +1,71 @@
+/* ------------------------------------------------------------------------
+@NAME : lex_auxiliary.h
+@DESCRIPTION: Macros and function prototypes needed by the lexical scanner.
+ Some of these are called from internal PCCTS code, and some
+ are explicitly called from the lexer actions in bibtex.g.
+@CREATED : Summer 1996, Greg Ward
+@MODIFIED :
+@VERSION : $Id: lex_auxiliary.h,v 1.15 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+#ifndef LEX_AUXILIARY_H
+#define LEX_AUXILIARY_H
+
+#include "btparse.h"
+#include "attrib.h"
+
+#define ZZCOPY_FUNCTION 0
+
+#if ZZCOPY_FUNCTION
+#define ZZCOPY zzcopy (&zznextpos, &lastpos, &zzbufovf)
+#else
+#define ZZCOPY \
+ if (zznextpos >= lastpos) \
+ { \
+ lexer_overflow (&lastpos, &zznextpos); \
+ } \
+ *(zznextpos++) = zzchar;
+#endif
+
+
+/* Function prototypes: */
+
+void lex_info (void);
+void zzcr_attr (Attrib *a, int tok, char *txt);
+
+void alloc_lex_buffer (int size);
+void free_lex_buffer (void);
+void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos);
+#if ZZCOPY_FUNCTION
+void zzcopy (char **nextpos, char **lastpos, int *ovf_flag);
+#endif
+
+void initialize_lexer_state (void);
+bt_metatype entry_metatype (void);
+
+void newline (void);
+void comment (void);
+void at_sign (void);
+void toplevel_junk (void);
+void name (void);
+void lbrace (void);
+void rbrace (void);
+void lparen (void);
+void rparen (void);
+
+void start_string (char start_char);
+void end_string (char end_char);
+void open_brace (void);
+void close_brace (void);
+void lparen_in_string (void);
+void rparen_in_string (void);
+void quote_in_string (void);
+void check_runaway_string (void);
+
+#endif /* ! defined LEX_AUXILIARY_H */
diff --git a/src/translators/btparse/macros.c b/src/translators/btparse/macros.c
new file mode 100644
index 0000000..06db983
--- /dev/null
+++ b/src/translators/btparse/macros.c
@@ -0,0 +1,367 @@
+/* ------------------------------------------------------------------------
+@NAME : macros.c
+@DESCRIPTION: Front-end to the standard PCCTS symbol table code (sym.c)
+ to abstract my "macro table".
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/12, Greg Ward
+@MODIFIED :
+@VERSION : $Id: macros.c,v 1.19 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <string.h>
+#include "sym.h"
+#include "prototypes.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+#include "bt_debug.h"
+
+
+/*
+ * NUM_MACROS and STRING_SIZE define the size of the static data
+ * structure that holds the macro table. The defaults are to allocate
+ * 4096 bytes of string space that will be divided up amongst 547
+ * macros. This should be fine for most applications, but if you have a
+ * big macro table you might need to change these and recompile (don't
+ * forget to rebuild and reinstall Text::BibTeX if you're using it!).
+ * You can set these as high as you like; just remember that a block of
+ * STRING_SIZE bytes will be allocated and not freed as long as you're
+ * using btparse. Also, NUM_MACROS defines the size of a hashtable, so
+ * it should probably be a prime a bit greater than a power of 2 -- or
+ * something like that. I'm not sure of the exact Knuthian
+ * specification.
+ */
+#define NUM_MACROS 547
+#define STRING_SIZE 4096
+
+Sym *AllMacros = NULL; /* `scope' so we can get back list */
+ /* of all macros when done */
+
+
+GEN_PRIVATE_ERRFUNC (macro_warning,
+ (char * filename, int line, const char * fmt, ...),
+ BTERR_CONTENT, filename, line, NULL, -1, fmt)
+
+
+/* ------------------------------------------------------------------------
+@NAME : init_macros()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Initializes the symbol table used to store macro values.
+@GLOBALS : AllMacros
+@CALLS : zzs_init(), zzs_scope() (sym.c)
+@CALLERS : bt_initialize() (init.c)
+@CREATED : Jan 1997, GPW
+-------------------------------------------------------------------------- */
+void
+init_macros (void)
+{
+ zzs_init (NUM_MACROS, STRING_SIZE);
+ zzs_scope (&AllMacros);
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : done_macros()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Frees up all the macro values in the symbol table, and
+ then frees up the symbol table itself.
+@GLOBALS : AllMacros
+@CALLS : zzs_rmscope(), zzs_done()
+@CALLERS : bt_cleanup() (init.c)
+@CREATED : Jan 1997, GPW
+-------------------------------------------------------------------------- */
+void
+done_macros (void)
+{
+ bt_delete_all_macros ();
+ zzs_done ();
+}
+
+
+static void
+delete_macro_entry (Sym * sym)
+{
+ Sym * cur;
+ Sym * prev;
+
+ /*
+ * Yechh! All this mucking about with the scope list really
+ * ought to be handled by the symbol table code. Must write
+ * my own someday.
+ */
+
+ /* Find this entry in the list of all macro table entries */
+ cur = AllMacros;
+ prev = NULL;
+ while (cur != NULL && cur != sym)
+ {
+ prev = cur;
+ cur = cur->scope;
+ }
+
+ if (cur == NULL) /* uh-oh -- wasn't found! */
+ {
+ internal_error ("macro table entry for \"%s\" not found in scope list",
+ sym->symbol);
+ }
+
+ /* Now unlink from the "scope" list */
+ if (prev == NULL) /* it's the head of the list */
+ AllMacros = cur->scope;
+ else
+ prev->scope = cur->scope;
+
+ /* Remove it from the macro hash table */
+ zzs_del (sym);
+
+ /* And finally, free up the entry's text and the entry itself */
+ if (sym->text) free (sym->text);
+ free (sym);
+} /* delete_macro_entry() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_add_macro_value()
+@INPUT : assignment - AST node representing "macro = value"
+ options - string-processing options that were used to
+ process this string after parsing
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Adds a value to the symbol table used for macros.
+
+ If the value was not already post-processed as a macro value
+ (expand macros, paste substrings, but don't collapse
+ whitespace), then this post-processing is done before adding
+ the macro text to the table.
+
+ If the macro is already defined, a warning is printed and
+ the old text is overridden.
+@GLOBALS :
+@CALLS : bt_add_macro_text()
+ bt_postprocess_field()
+@CALLERS : bt_postprocess_entry() (post_parse.c)
+@CREATED : Jan 1997, GPW
+-------------------------------------------------------------------------- */
+void
+bt_add_macro_value (AST *assignment, ushort options)
+{
+ AST * value;
+ char * macro;
+ char * text;
+ boolean free_text;
+
+ if (assignment == NULL || assignment->down == NULL) return;
+ value = assignment->down;
+
+ /*
+ * If the options that were used to process the macro's expansion text
+ * are anything other than BTO_MACRO, then we'll have to do it ourselves.
+ */
+
+ if ((options & BTO_STRINGMASK) != BTO_MACRO)
+ {
+ text = bt_postprocess_field (assignment, BTO_MACRO, FALSE);
+ free_text = TRUE; /* because it's alloc'd by */
+ /* bt_postprocess_field() */
+ }
+ else
+ {
+ /*
+ * First a sanity check to make sure that the presumed post-processing
+ * had the desired effect.
+ */
+
+ if (value->nodetype != BTAST_STRING || value->right != NULL)
+ {
+ internal_error ("add_macro: macro value was not "
+ "correctly preprocessed");
+ }
+
+ text = assignment->down->text;
+ free_text = FALSE;
+ }
+
+ macro = assignment->text;
+ bt_add_macro_text (macro, text, assignment->filename, assignment->line);
+ if (free_text && text != NULL)
+ free (text);
+
+} /* bt_add_macro_value() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_add_macro_text()
+@INPUT : macro - the name of the macro to define
+ text - the macro text
+ filename, line - where the macro is defined; pass NULL
+ for filename if no file, 0 for line if no line number
+ (just used to generate warning message)
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Sets the text value for a macro. If the macro is already
+ defined, a warning is printed and the old value is overridden.
+@GLOBALS :
+@CALLS : zzs_get(), zzs_newadd()
+@CALLERS : bt_add_macro_value()
+ (exported from library)
+@CREATED : 1997/11/13, GPW (from code in bt_add_macro_value())
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_add_macro_text (char * macro, char * text, char * filename, int line)
+{
+ Sym * sym;
+ Sym * new_rec;
+
+#if DEBUG == 1
+ printf ("adding macro \"%s\" = \"%s\"\n", macro, text);
+#elif DEBUG >= 2
+ printf ("add_macro: macro = %p (%s)\n"
+ " text = %p (%s)\n",
+ macro, macro, text, text);
+#endif
+
+ if ((sym = zzs_get (macro)))
+ {
+ macro_warning (filename, line,
+ "overriding existing definition of macro \"%s\"",
+ macro);
+ delete_macro_entry (sym);
+ }
+
+ new_rec = zzs_newadd (macro);
+ new_rec->text = (text != NULL) ? strdup (text) : NULL;
+ DBG_ACTION
+ (2, printf (" saved = %p (%s)\n",
+ new_rec->text, new_rec->text);)
+
+} /* bt_add_macro_text() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_delete_macro()
+@INPUT : macro - name of macro to delete
+@DESCRIPTION: Deletes a macro from the macro table.
+@CALLS : zzs_get()
+@CALLERS :
+@CREATED : 1998/03/01, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_delete_macro (char * macro)
+{
+ Sym * sym;
+
+ sym = zzs_get (macro);
+ if (! sym) return;
+ delete_macro_entry (sym);
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_delete_all_macros()
+@DESCRIPTION: Deletes all macros from the macro table.
+@CALLS : zzs_rmscore()
+@CALLERS :
+@CREATED : 1998/03/01, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_delete_all_macros (void)
+{
+ Sym *cur, *next;
+
+ DBG_ACTION (2, printf ("bt_delete_all_macros():\n");)
+
+ /*
+ * Use the current `scope' (same one for all macros) to get access to
+ * a linked list of all macros. Then traverse the list, free()'ing
+ * both the text (which was strdup()'d in add_macro(), below) and
+ * the records themselves (which are calloc()'d by zzs_new()).
+ */
+
+ cur = zzs_rmscope (&AllMacros);
+ while (cur != NULL)
+ {
+ DBG_ACTION
+ (2, printf (" freeing macro \"%s\" (%p=\"%s\") at %p\n",
+ cur->symbol, cur->text, cur->text, cur);)
+
+ next = cur->scope;
+ if (cur->text != NULL) free (cur->text);
+ free (cur);
+ cur = next;
+ }
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_macro_length()
+@INPUT : macro - the macro name
+@OUTPUT :
+@RETURNS : length of the macro's text, or zero if the macro is undefined
+@DESCRIPTION: Returns length of a macro's text.
+@GLOBALS :
+@CALLS : zzs_get()
+@CALLERS : bt_postprocess_value()
+ (exported from library)
+@CREATED : Jan 1997, GPW
+-------------------------------------------------------------------------- */
+int
+bt_macro_length (char *macro)
+{
+ Sym *sym;
+
+ DBG_ACTION
+ (2, printf ("bt_macro_length: looking up \"%s\"\n", macro);)
+
+ sym = zzs_get (macro);
+ if (sym)
+ return strlen (sym->text);
+ else
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_macro_text()
+@INPUT : macro - the macro name
+ filename, line - where the macro was invoked; NULL for
+ `filename' and zero for `line' if not applicable
+@OUTPUT :
+@RETURNS : The text of the macro, or NULL if it's undefined.
+@DESCRIPTION: Fetches a macros text; prints warning and returns NULL if
+ macro is undefined.
+@CALLS : zzs_get()
+@CALLERS : bt_postprocess_value()
+@CREATED : Jan 1997, GPW
+-------------------------------------------------------------------------- */
+char *
+bt_macro_text (char * macro, char * filename, int line)
+{
+ Sym * sym;
+
+ DBG_ACTION
+ (2, printf ("bt_macro_text: looking up \"%s\"\n", macro);)
+
+ sym = zzs_get (macro);
+ if (!sym)
+ {
+ macro_warning (filename, line, "undefined macro \"%s\"", macro);
+ return NULL;
+ }
+
+ return sym->text;
+}
diff --git a/src/translators/btparse/mode.h b/src/translators/btparse/mode.h
new file mode 100644
index 0000000..25b36ce
--- /dev/null
+++ b/src/translators/btparse/mode.h
@@ -0,0 +1,3 @@
+#define START 0
+#define LEX_ENTRY 1
+#define LEX_STRING 2
diff --git a/src/translators/btparse/modify.c b/src/translators/btparse/modify.c
new file mode 100644
index 0000000..2d8d9c1
--- /dev/null
+++ b/src/translators/btparse/modify.c
@@ -0,0 +1,75 @@
+/* ------------------------------------------------------------------------
+@NAME : modify.c
+@DESCRIPTION: Routines for modifying the AST for a single entry.
+@GLOBALS :
+@CALLS :
+@CREATED : 1999/11/25, Greg Ward (based on code supplied by
+ St�phane Genaud <[email protected]>)
+@MODIFIED :
+@VERSION : $Id: modify.c,v 1.2 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <string.h>
+#include "btparse.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_set_text ()
+@INPUT : node
+ new_text
+@OUTPUT : node->text
+@RETURNS :
+@DESCRIPTION: Replace the text member of an AST node with a new string.
+ The passed in string, 'new_text', is duplicated, so the
+ caller may free it without worry.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1999/11/25, GPW (from St�phane Genaud)
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void bt_set_text (AST * node, char * new_text)
+{
+ free(node->text);
+ node->text = strdup (new_text);
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_entry_set_key ()
+@INPUT : entry
+ new_key
+@OUTPUT : entry->down->text
+@RETURNS :
+@DESCRIPTION: Changes the key of a regular entry to 'new_key'. If 'entry'
+ is not a regular entry, or if it doesn't already have a child
+ node holding an entry key, bombs via 'usage_error()'.
+ Otherwise a duplicate of 'new_key' is copied into the entry
+ AST (so the caller can free that string without worry).
+@CALLS : bt_set_text ()
+@CREATED : 1999/11/25, GPW (from St�phane Genaud)
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void bt_entry_set_key (AST * entry, char * new_key)
+{
+ if (entry->metatype == BTE_REGULAR &&
+ entry->down && entry->down->nodetype == BTAST_KEY)
+ {
+ bt_set_text (entry->down, new_key);
+ }
+ else
+ {
+ usage_error ("can't set entry key -- not a regular entry, "
+ "or doesn't have a key already");
+ }
+}
diff --git a/src/translators/btparse/my_alloca.h b/src/translators/btparse/my_alloca.h
new file mode 100644
index 0000000..0466157
--- /dev/null
+++ b/src/translators/btparse/my_alloca.h
@@ -0,0 +1,35 @@
+/* ------------------------------------------------------------------------
+@NAME : my_alloca.h
+@DESCRIPTION: All-out assault at making alloca() available on any Unix
+ platform. Stolen from the GNU Autoconf manual.
+@CREATED : 1997/10/30, Greg Ward
+@VERSION : $Id: my_alloca.h,v 1.1 1997/10/31 03:56:17 greg Rel $
+@COPYRIGHT : This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+#ifndef MY_ALLOCA_H
+#define MY_ALLOCA_H
+
+#ifdef __GNUC__
+# ifndef alloca
+# define alloca __builtin_alloca
+# endif
+#else
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+# pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+
+#endif /* MY_ALLOCA_H */
diff --git a/src/translators/btparse/names.c b/src/translators/btparse/names.c
new file mode 100644
index 0000000..11c4bfd
--- /dev/null
+++ b/src/translators/btparse/names.c
@@ -0,0 +1,915 @@
+/* ------------------------------------------------------------------------
+@NAME : names.c
+@DESCRIPTION: Functions for dealing with BibTeX names and lists of names:
+ bt_split_list
+ bt_split_name
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/05/05, Greg Ward (as string_util.c)
+@MODIFIED : 1997/05/14-05/16, GW: added all the code to split individual
+ names, renamed file to names.c
+@VERSION : $Id: names.c,v 1.23 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "btparse.h"
+#include "prototypes.h"
+#include "error.h"
+#include "my_alloca.h"
+/*#include "my_dmalloc.h"*/
+#include "bt_debug.h"
+
+
+#define MAX_COMMAS 2
+
+#define update_depth(s,offs,depth) \
+switch (s[offs]) \
+{ \
+ case '{': depth++; break; \
+ case '}': depth--; break; \
+}
+
+/*
+ * `name_loc' specifies where a name is found -- used for generating
+ * useful warning messages. `line' and `name_num' are both 1-based.
+ */
+typedef struct
+{
+ char * filename;
+ int line;
+ int name_num;
+} name_loc;
+
+
+GEN_PRIVATE_ERRFUNC (name_warning,
+ (name_loc * loc, const char * fmt, ...),
+ BTERR_CONTENT, loc->filename, loc->line,
+ "name", loc->name_num, fmt)
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_split_list()
+@INPUT : string - string to split up; whitespace must be collapsed
+ eg. by bt_postprocess_string()
+ delim - delimiter to use; must be lowercase and should be
+ free of whitespace (code requires that delimiters
+ in string be surrounded by whitespace)
+ filename - source of string (for warning messages)
+ line - 1-based line number into file (for warning messages)
+ description - what substrings are (eg. "name") (for warning
+ messages); if NULL will use "substring"
+@OUTPUT : substrings (*substrings is allocated by bt_split_list() for you)
+@RETURNS : number of substrings found
+@DESCRIPTION: Splits a string using a fixed delimiter, in the BibTeX way:
+ * delimiters at beginning or end of string are ignored
+ * delimiters in string must be surrounded by whitespace
+ * case insensitive
+ * delimiters at non-zero brace depth are ignored
+
+ The list of substrings is returned as *substrings, which
+ is an array of pointers into a duplicate of string. This
+ duplicate copy has been scribbled on such that there is
+ a nul byte at the end of every substring. You should
+ call bt_free_list() to free both the duplicate copy
+ of string and *substrings itself. Do *not* walk over
+ the array free()'ing the substrings yourself, as this is
+ invalid -- they were not malloc()'d!
+@GLOBALS :
+@CALLS :
+@CALLERS : anyone (exported by library)
+@CREATED : 1997/05/05, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+bt_stringlist *
+bt_split_list (char * string,
+ char * delim,
+ char * filename,
+ int line,
+ char * description)
+{
+ int depth; /* brace depth */
+ int i, j; /* offset into string and delim */
+ int inword; /* flag telling if prev. char == ws */
+ int string_len;
+ int delim_len;
+ int maxdiv; /* upper limit on no. of divisions */
+ int maxoffs; /* max offset of delim in string */
+ int numdiv; /* number of divisions */
+ int * start; /* start of each division */
+ int * stop; /* stop of each division */
+ bt_stringlist *
+ list; /* structure to return */
+
+ if (string == NULL)
+ return NULL;
+ if (description == NULL)
+ description = "substring";
+
+ string_len = strlen (string);
+ delim_len = strlen (delim);
+ maxdiv = (string_len / delim_len) + 1;
+ maxoffs = string_len - delim_len + 1;
+
+ /*
+ * This is a bit of a band-aid solution to the "split empty string"
+ * bug (formerly hit the internal_error() at the end of hte function).
+ * Still need a general "detect and fix unpreprocessed string" --
+ * admittedly a different bug/misfeature.
+ */
+ if (string_len == 0)
+ return NULL;
+
+ start = (int *) alloca (maxdiv * sizeof (int));
+ stop = (int *) alloca (maxdiv * sizeof (int));
+
+ list = (bt_stringlist *) malloc (sizeof (bt_stringlist));
+
+ depth = 0;
+ i = j = 0;
+ inword = 1; /* so leading delim ignored */
+ numdiv = 0;
+ start[0] = 0; /* first substring @ start of string */
+
+ while (i < maxoffs)
+ {
+ /* does current char. in string match current char. in delim? */
+ if (depth == 0 && !inword && tolower (string[i]) == delim[j])
+ {
+ j++; i++;
+
+ /* have we found an entire delim, followed by a space? */
+ if (j == delim_len && string[i] == ' ')
+ {
+
+ stop[numdiv] = i - delim_len - 1;
+ start[++numdiv] = ++i;
+ j = 0;
+
+#if DEBUG
+ printf ("found complete delim; i == %d, numdiv == %d: "
+ "stop[%d] == %d, start[%d] == %d\n",
+ i, numdiv,
+ numdiv-1, stop[numdiv-1],
+ numdiv, start[numdiv]);
+#endif
+ }
+ }
+
+ /* no match between string and delim, at non-zero depth, or in a word */
+ else
+ {
+ update_depth (string, i, depth);
+ inword = (i < string_len) && (string[i] != ' ');
+ i++;
+ j = 0;
+ }
+ }
+
+ stop[numdiv] = string_len; /* last substring ends just past eos */
+ list->num_items = numdiv+1;
+
+
+ /*
+ * OK, now we know how many divisions there are and where they are --
+ * so let's split that string up for real!
+ *
+ * list->items will be an array of pointers into a duplicate of
+ * `string'; we duplicate `string' so we can safely scribble on it and
+ * free() it later (in bt_free_list()).
+ */
+
+ list->items = (char **) malloc (list->num_items * sizeof (char *));
+ list->string = strdup (string);
+
+ for (i = 0; i < list->num_items; i++)
+ {
+ /*
+ * Possible cases:
+ * - stop < start is for empty elements, e.g. "and and" seen in
+ * input. (`start' for empty element will be the 'a' of the
+ * second 'and', and its stop will be the ' ' *before* the
+ * second 'and'.)
+ * - stop > start is for anything else between two and's (the usual)
+ * - stop == start should never happen if the loop above is correct
+ */
+
+ if (stop[i] > start[i]) /* the usual case */
+ {
+ list->string[stop[i]] = 0;
+ list->items[i] = list->string+start[i];
+ }
+ else if (stop[i] < start[i]) /* empty element */
+ {
+ list->items[i] = NULL;
+ general_error (BTERR_CONTENT, filename, line,
+ description, i+1, "empty %s", description);
+ }
+ else /* should not happen! */
+ {
+ internal_error ("stop == start for substring %d", i);
+ }
+ }
+
+ return list;
+/* return num_substrings; */
+
+} /* bt_split_list () */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_free_list()
+@INPUT : list
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Frees the list of strings created by bt_split_list().
+@GLOBALS :
+@CALLS :
+@CALLERS : anyone (exported by library)
+@CREATED : 1997/05/06, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void bt_free_list (bt_stringlist *list)
+{
+ if (list && list->string) free (list->string);
+ if (list && list->items) free (list->items);
+ if (list) free (list);
+}
+
+
+
+/* ----------------------------------------------------------------------
+ * Stuff for splitting up a single name
+ */
+
+
+/* ------------------------------------------------------------------------
+@NAME : find_commas
+@INPUT : name - string to search for commas
+ max_commas - maximum number of commas to allow (if more than
+ this number are seen, a warning is printed and
+ the excess commas are removed)
+@OUTPUT :
+@RETURNS : number of commas found
+@DESCRIPTION: Counts and records positions of commas at brace-depth 0.
+ Modifies string in-place to remove whitespace around commas,
+ excess commas, and any trailing commas; warns on excess or
+ trailing commas. Excess commas are removed by replacing them
+ with space and calling bt_postprocess_string() to collapse
+ whitespace a second time; trailing commas are simply replaced
+ with (char) 0 to truncate the string.
+
+ Assumes whitespace has been collapsed (ie. no space at
+ beginning or end of string, and all internal strings of
+ whitespace reduced to exactly one space).
+@GLOBALS :
+@CALLS : name_warning() (if too many commas, or commas at end)
+@CALLERS : bt_split_name()
+@CREATED : 1997/05/14, Greg Ward
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static int
+find_commas (name_loc * loc, char *name, int max_commas)
+{
+ int i, j;
+ int depth;
+ int num_commas;
+ int len;
+ boolean at_comma;
+ boolean warned;
+
+ i = j = 0;
+ depth = 0;
+ num_commas = 0;
+ len = strlen (name);
+ warned = 0;
+
+ /* First pass to check for and blank out excess commas */
+
+ for (i = 0; i < len; i++)
+ {
+ if (depth == 0 && name[i] == ',')
+ {
+ num_commas++;
+ if (num_commas > max_commas)
+ {
+ if (! warned)
+ {
+ name_warning (loc, "too many commas in name (removing extras)");
+ warned = TRUE;
+ }
+ name[i] = ' ';
+ }
+ }
+ }
+
+ /*
+ * If we blanked out a comma, better re-collapse whitespace. (This is
+ * a bit of a cop-out -- I could probably adjust i and j appropriately
+ * in the above loop to do the collapsing for me, but my brain
+ * hurt when I tried to think it through. Some other time, perhaps.
+ */
+
+ if (warned)
+ bt_postprocess_string (name, BTO_COLLAPSE);
+
+ /* Now the real comma-finding loop (only if necessary) */
+
+ if (num_commas == 0)
+ return 0;
+
+ num_commas = 0;
+ i = 0;
+ while (i < len)
+ {
+ at_comma = (depth == 0 && name[i] == ',');
+ if (at_comma)
+ {
+ while (j > 0 && name[j-1] == ' ') j--;
+ num_commas++;
+ }
+
+ update_depth (name, i, depth);
+ if (i != j)
+ name[j] = name[i];
+
+ i++; j++;
+ if (at_comma)
+ {
+ while (i < len && name[i] == ' ') i++;
+ }
+ } /* while i */
+
+ if (i != j) name[j] = (char) 0;
+ j--;
+
+ if (name[j] == ',')
+ {
+ name_warning (loc, "comma(s) at end of name (removing)");
+ while (name[j] == ',')
+ {
+ name[j--] = (char) 0;
+ num_commas--;
+ }
+ }
+
+ return num_commas;
+
+} /* find_commas() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : find_tokens
+@INPUT : name - string to tokenize (should be a private copy
+ that we're free to clobber and mangle)
+@OUTPUT : comma_token- number of token immediately preceding each comma
+ (caller must allocate with at least one element
+ per comma in `name')
+@RETURNS : newly-allocated bt_stringlist structure
+@DESCRIPTION: Finds tokens in a string; delimiter is space or comma at
+ brace-depth zero. Assumes whitespace has been collapsed
+ and find_commas has been run on the string to remove
+ whitespace around commas and any trailing commas.
+
+ The bt_stringlist structure returned can (and should) be
+ freed with bt_free_list().
+@GLOBALS :
+@CALLS :
+@CALLERS : bt_split_name()
+@CREATED : 1997/05/14, Greg Ward
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static bt_stringlist *
+find_tokens (char * name,
+ int * comma_token)
+{
+ int i; /* index into name */
+ int num_tok;
+ int in_boundary; /* previous char was ' ' or ',' */
+ int cur_comma; /* index into comma_token */
+ int len;
+ int depth;
+ bt_stringlist *
+ tokens;
+
+ i = 0;
+ in_boundary = 1; /* so first char will start a token */
+ cur_comma = 0;
+ len = strlen (name);
+ depth = 0;
+
+ tokens = (bt_stringlist *) malloc (sizeof (bt_stringlist));
+ /* tokens->string = name ? strdup (name) : NULL; */
+ tokens->string = name;
+ num_tok = 0;
+ tokens->items = NULL;
+
+ if (len == 0) /* empty string? */
+ return tokens; /* return empty token list */
+
+ tokens->items = (char **) malloc (sizeof (char *) * len);
+
+ while (i < len)
+ {
+ if (depth == 0 && in_boundary) /* at start of a new token */
+ {
+ tokens->items[num_tok++] = name+i;
+ }
+
+ if (depth == 0 && (name[i] == ' ' || name[i] == ','))
+ {
+ /* if we're at a comma, record the token preceding the comma */
+
+ if (name[i] == ',')
+ {
+ comma_token[cur_comma++] = num_tok-1;
+ }
+
+ /*
+ * if already in a boundary zone, we have an empty token
+ * (caused by multiple consecutive commas)
+ */
+ if (in_boundary)
+ {
+ tokens->items[num_tok-1] = NULL;
+ }
+ num_tok--;
+
+ /* in any case, mark the end of one token and prepare for the
+ * start of the next
+ */
+ name[i] = (char) 0;
+ in_boundary = 1;
+ }
+ else
+ {
+ in_boundary = 0; /* inside a token */
+ }
+
+ update_depth (name, i, depth);
+ i++;
+
+ } /* while i */
+
+ tokens->num_items = num_tok;
+ return tokens;
+
+} /* find_tokens() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : find_lc_tokens()
+@INPUT : tokens
+@OUTPUT : first_lc
+ last_lc
+@RETURNS :
+@DESCRIPTION: Finds the first contiguous string of lowercase tokens in
+ `name'. The string must already be tokenized by
+ find_tokens(), and the input args num_tok, tok_start, and
+ tok_stop are the return value and the two same-named output
+ arguments from find_tokens().
+@GLOBALS :
+@CALLS :
+@CALLERS : bt_split_name()
+@CREATED : 1997/05/14, Greg Ward
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+find_lc_tokens (bt_stringlist * tokens,
+ int * first_lc,
+ int * last_lc)
+{
+ int i; /* iterate over token list this time */
+ int in_lc_sequence; /* in contig. sequence of lc tokens? */
+
+ *first_lc = *last_lc = -1; /* haven't found either yet */
+ in_lc_sequence = 0;
+
+ i = 0;
+ while (i < tokens->num_items)
+ {
+ if (*first_lc == -1 && islower (tokens->items[i][0]))
+ {
+ *first_lc = i;
+
+ i++;
+ while (i < tokens->num_items && islower (tokens->items[i][0]))
+ i++;
+
+ *last_lc = i-1;
+ }
+ else
+ {
+ i++;
+ }
+ }
+} /* find_lc_tokens() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : resolve_token_range()
+@INPUT : tokens - structure containing the token list
+ tok_range - two-element array with start and stop token number
+@OUTPUT : *part - set to point to first token in range, or NULL
+ if empty range
+ *num_tok - number of tokens in the range
+@RETURNS :
+@DESCRIPTION: Given a list of tokens and a range of token numbers (as a
+ two-element array, tok_range), computes the number of tokens
+ in the range. If this is >= 0, sets *part to point
+ to the first token in the range; otherwise, sets *part
+ to NULL.
+@CALLERS :
+@CREATED : May 1997, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+resolve_token_range (bt_stringlist *tokens,
+ int * tok_range,
+ char *** part,
+ int * num_tok)
+{
+ *num_tok = (tok_range[1] - tok_range[0]) + 1;
+ if (*num_tok <= 0)
+ {
+ *num_tok = 0;
+ *part = NULL;
+ }
+ else
+ {
+ *part = tokens->items + tok_range[0];
+ }
+} /* resolve_token_range() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : split_simple_name()
+@INPUT : name
+ first_lc
+ last_lc
+@OUTPUT : name
+@RETURNS :
+@DESCRIPTION: Splits up a name (represented as a string divided into
+ non-overlapping, whitespace-separated tokens) according
+ to the BibTeX rules for names without commas. Specifically:
+ * tokens up to (but not including) the first lowercase
+ token, or the last token of the string if there
+ are no lowercase tokens, become the `first' part
+ * the earliest contiguous sequence of lowercase tokens,
+ up to (but not including) the last token of the string,
+ becomes the `von' part
+ * the tokens following the `von' part, or the last
+ single token if there is no `von' part, become
+ the `last' part
+ * there is no `jr' part
+@GLOBALS :
+@CALLS : name_warning() (if last lc token taken as lastname)
+ resolve_token_range()
+@CALLERS : bt_split_name()
+@CREATED : 1997/05/15, Greg Ward
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+split_simple_name (name_loc * loc,
+ bt_name * name,
+ int first_lc,
+ int last_lc)
+{
+ int first_t[2], von_t[2], last_t[2];
+ int end;
+
+ end = name->tokens->num_items-1; /* token number of last token */
+
+ if (first_lc > -1) /* any lowercase tokens at all? */
+ {
+ first_t[0] = 0; /* first name goes from beginning */
+ first_t[1] = first_lc-1; /* to just before first lc token */
+
+ if (last_lc == end) /* sequence of lowercase tokens */
+ { /* goes all the way to end of string */
+ last_lc--; /* -- roll it back by one so we */
+ /* still have a lastname */
+#ifdef WARN_LC_LASTNAME
+ /*
+ * disable this warning for now because "others" is used fairly
+ * often as a name in BibTeX databases -- oops!
+ */
+ name_warning (loc,
+ "no capitalized token at end of name; "
+ "using \"%s\" as lastname",
+ name->tokens->items[end]);
+#else
+# ifndef ALLOW_WARNINGS
+ loc = NULL; /* avoid "unused parameter" warning */
+# endif
+#endif
+ }
+
+ von_t[0] = first_lc; /* `von' part covers sequence of */
+ von_t[1] = last_lc; /* lowercase tokens */
+ last_t[0] = last_lc+1; /* lastname from after `von' to end */
+ last_t[1] = end; /* of string */
+ }
+ else /* no lowercase tokens */
+ {
+ von_t[0] = 0; /* empty `von' part */
+ von_t[1] = -1;
+ first_t[0] = 0; /* `first' goes from first to second */
+ first_t[1] = end-1; /* last token */
+ last_t[0] = last_t[1] = end; /* and `last' is just the last token */
+ }
+
+ resolve_token_range (name->tokens, first_t,
+ name->parts+BTN_FIRST, name->part_len+BTN_FIRST);
+ resolve_token_range (name->tokens, von_t,
+ name->parts+BTN_VON, name->part_len+BTN_VON);
+ resolve_token_range (name->tokens, last_t,
+ name->parts+BTN_LAST, name->part_len+BTN_LAST);
+ name->parts[BTN_JR] = NULL; /* no jr part possible */
+ name->part_len[BTN_JR] = 0;
+
+} /* split_simple_name() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : split_general_name()
+@INPUT : name
+ num_commas
+ comma_token
+ first_lc
+ last_lc
+@OUTPUT : name
+@RETURNS :
+@DESCRIPTION: Splits a name according to the BibTeX rules for names
+ with 1 or 2 commas (> 2 commas is handled elsewhere,
+ namely by bt_split_name() calling find_commas() with
+ max_commas == 2). Specifically:
+ * an initial string of lowercase tokens, up to (but not
+ including) the token before the first comma, becomes
+ the `von' part
+ * tokens from immediately after the `von' part,
+ or from the beginning of the string if no `von',
+ up to the first comma become the `last' part
+
+ if one comma:
+ * all tokens following the sole comma become the
+ `first' part
+
+ if two commas:
+ * tokens between the two commas become the `jr' part
+ * all tokens following the second comma become the
+ `first' part
+@GLOBALS :
+@CALLS : name_warning() (if last lc token taken as lastname)
+ resolve_token_range()
+@CALLERS : bt_split_name()
+@CREATED : 1997/05/15, Greg Ward
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+split_general_name (name_loc * loc,
+ bt_name * name,
+ int num_commas,
+ int * comma_token,
+ int first_lc,
+ int last_lc)
+{
+ int first_t[2], von_t[2], last_t[2], jr_t[2];
+ int end;
+
+ end = name->tokens->num_items-1; /* last token number */
+
+ if (first_lc == 0) /* we have an initial string of */
+ { /* lowercase tokens */
+ if (last_lc == comma_token[0]) /* lc string ends at first comma */
+ {
+ name_warning (loc, "no capitalized tokens before first comma");
+ last_lc--;
+ }
+
+ von_t[0] = first_lc; /* `von' covers the sequence of */
+ von_t[1] = last_lc; /* lowercase tokens */
+ }
+ else /* no lowercase tokens at start */
+ {
+ von_t[0] = 0; /* empty `von' part */
+ von_t[1] = -1;
+ }
+
+ last_t[0] = von_t[1] + 1; /* start right after end of `von' */
+ last_t[1] = comma_token[0]; /* and end at first comma */
+
+ if (num_commas == 1)
+ {
+ first_t[0] = comma_token[0]+1; /* start right after comma */
+ first_t[1] = end; /* stop at end of string */
+ jr_t[0] = 0; /* empty `jr' part */
+ jr_t[1] = -1;
+ }
+ else /* more than 1 comma */
+ {
+ jr_t[0] = comma_token[0]+1; /* start after first comma */
+ jr_t[1] = comma_token[1]; /* stop at second comma */
+ first_t[0] = comma_token[1]+1; /* start after second comma */
+ first_t[1] = end; /* and go to end */
+ }
+
+ resolve_token_range (name->tokens, first_t,
+ name->parts+BTN_FIRST, name->part_len+BTN_FIRST);
+ resolve_token_range (name->tokens, von_t,
+ name->parts+BTN_VON, name->part_len+BTN_VON);
+ resolve_token_range (name->tokens, last_t,
+ name->parts+BTN_LAST, name->part_len+BTN_LAST);
+ resolve_token_range (name->tokens, jr_t,
+ name->parts+BTN_JR, name->part_len+BTN_JR);
+
+} /* split_general_name() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_split_name()
+@INPUT : name
+ filename
+ line
+ name_num
+@OUTPUT :
+@RETURNS : newly-allocated bt_name structure containing the four
+ parts as token-lists
+@DESCRIPTION: Splits a name according to the BibTeX rules. There are
+ actually two sets of rules: one for names with no commas,
+ and one for names with 1 or 2 commas. (If a name has
+ more than 2 commas, the extras are removed and it's treated
+ as though it had just the first 2.)
+
+ See split_simple_name() for the no-comma rules, and
+ split_general_name() for the 1-or-2-commas rules.
+
+ The bt_name structure returned can (and should) be freed
+ with bt_free_name() when you no longer need it.
+@GLOBALS :
+@CALLS :
+@CALLERS : anyone (exported by library)
+@CREATED : 1997/05/14, Greg Ward
+@MODIFIED :
+@COMMENTS : The name-splitting code all implicitly assumes that the
+ string being split has been post-processed to collapse
+ whitespace in the BibTeX way. This means that it tends to
+ dump core on such things as leading whitespace, or more than
+ one space in a row inside the string. This could probably be
+ alleviated with a call to bt_postprocess_string(), possibly
+ preceded by a check for any of those occurences. Before
+ doing that, though, I want to examine the code carefully to
+ determine just what assumptions it makes -- so I can
+ check/correct for all of them.
+-------------------------------------------------------------------------- */
+bt_name *
+bt_split_name (char * name,
+ char * filename,
+ int line,
+ int name_num)
+{
+ name_loc loc;
+ bt_stringlist *
+ tokens;
+ int comma_token[MAX_COMMAS];
+ int len;
+ int num_commas;
+ int first_lc, last_lc;
+ bt_name * split_name;
+ int i;
+
+ DBG_ACTION (1, printf ("bt_split_name(): name=%p (%s)\n", name, name))
+
+ split_name = (bt_name *) malloc (sizeof (bt_name));
+ if (name == NULL)
+ {
+ len = 0;
+ }
+ else
+ {
+ name = strdup (name); /* private copy that we may clobber */
+ len = strlen (name);
+ }
+
+ DBG_ACTION (1, printf ("bt_split_name(): split_name=%p\n", split_name))
+
+ if (len == 0) /* non-existent or empty string? */
+ {
+ split_name->tokens = NULL;
+ for (i = 0; i < BT_MAX_NAMEPARTS; i++)
+ {
+ split_name->parts[i] = NULL;
+ split_name->part_len[i] = 0;
+ }
+ return split_name;
+ }
+
+ loc.filename = filename; /* so called functions can generate */
+ loc.line = line; /* decent warning messages */
+ loc.name_num = name_num;
+
+ num_commas = find_commas (&loc, name, MAX_COMMAS);
+ assert (num_commas <= MAX_COMMAS);
+
+ DBG_ACTION (1, printf ("found %d commas: ", num_commas))
+
+ tokens = find_tokens (name, comma_token);
+
+#if DEBUG
+ printf ("found %d tokens:\n", tokens->num_items);
+ for (i = 0; i < tokens->num_items; i++)
+ {
+ printf (" %d: ", i);
+
+ if (tokens->items[i]) /* non-empty token? */
+ {
+ printf (">%s<\n", tokens->items[i]);
+ }
+ else
+ {
+ printf ("(empty)\n");
+ }
+ }
+#endif
+
+#if DEBUG
+ printf ("comma tokens: ");
+ for (i = 0; i < num_commas; i++)
+ printf ("%d ", comma_token[i]);
+ printf ("\n");
+#endif
+
+ find_lc_tokens (tokens, &first_lc, &last_lc);
+#if DEBUG
+ printf ("(first,last) lc tokens = (%d,%d)\n", first_lc, last_lc);
+#endif
+
+ if (strlen (name) == 0) /* name now empty? */
+ {
+ split_name->tokens = NULL;
+ for (i = 0; i < BT_MAX_NAMEPARTS; i++)
+ {
+ split_name->parts[i] = NULL;
+ split_name->part_len[i] = 0;
+ }
+ }
+ else
+ {
+ split_name->tokens = tokens;
+ if (num_commas == 0) /* no commas -- "simple" format */
+ {
+ split_simple_name (&loc, split_name,
+ first_lc, last_lc);
+ }
+ else
+ {
+ split_general_name (&loc, split_name,
+ num_commas, comma_token,
+ first_lc, last_lc);
+ }
+ }
+
+#if DEBUG
+ printf ("bt_split_name(): returning structure %p\n", split_name);
+#endif
+ return split_name;
+} /* bt_split_name() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_free_name()
+@INPUT : name
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Frees up any memory allocated for a bt_name structure
+ (namely, the `tokens' field [a bt_stringlist structure,
+ this freed with bt_free_list()] and the structure itself.)
+@CALLS : bt_free_list()
+@CALLERS : anyone (exported)
+@CREATED : 1997/11/14, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_free_name (bt_name * name)
+{
+ DBG_ACTION (2, printf ("bt_free_name(): freeing name %p "
+ "(%d tokens, string=%p (%s), last[0]=%s)\n",
+ name,
+ name->tokens->num_items,
+ name->tokens->string,
+ name->tokens->string,
+ name->parts[BTN_LAST][0]));
+ bt_free_list (name->tokens);
+ free (name);
+ DBG_ACTION (2, printf ("bt_free_name(): done, everything freed\n"));
+}
diff --git a/src/translators/btparse/parse_auxiliary.c b/src/translators/btparse/parse_auxiliary.c
new file mode 100644
index 0000000..f509741
--- /dev/null
+++ b/src/translators/btparse/parse_auxiliary.c
@@ -0,0 +1,336 @@
+/* ------------------------------------------------------------------------
+@NAME : parse_auxiliary.c
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Anything needed by the parser that's too hairy to go in the
+ grammar itself. Currently, just stuff needed for generating
+ syntax errors. (See error.c for how they're actually
+ printed.)
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1996/08/07, Greg Ward
+@MODIFIED :
+@VERSION : $Id: parse_auxiliary.c,v 1.20 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include "stdpccts.h"
+#include "error.h"
+#include "lex_auxiliary.h"
+#include "parse_auxiliary.h"
+/*#include "my_dmalloc.h"*/
+
+extern char * InputFilename; /* from input.c */
+
+GEN_PRIVATE_ERRFUNC (syntax_error, (char * fmt, ...),
+ BTERR_SYNTAX, InputFilename, zzline, NULL, -1, fmt)
+
+
+/* this is stolen from PCCTS' err.h */
+static SetWordType bitmask[] =
+{
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080
+};
+
+static struct
+{
+ int token;
+ const char *new_name;
+} new_tokens[] =
+{
+ { AT, "\"@\"" },
+ { NAME, "name (entry type, key, field, or macro name)" },
+ { LBRACE, "left brace (\"{\")" },
+ { RBRACE, "right brace (\"}\")" },
+ { ENTRY_OPEN, "start of entry (\"{\" or \"(\")" },
+ { ENTRY_CLOSE,"end of entry (\"}\" or \")\")" },
+ { EQUALS, "\"=\"" },
+ { HASH, "\"#\"" },
+ { COMMA, "\",\"" },
+ { NUMBER, "number" },
+ { STRING, "quoted string ({...} or \"...\")" }
+};
+
+
+#ifdef CLEVER_TOKEN_STUFF
+char **token_names;
+#endif
+
+
+void
+fix_token_names (void)
+{
+ int i;
+ int num_replace;
+
+#ifdef CLEVER_TOKEN_STUFF /* clever, but it doesn't work... */
+ /* arg! this doesn't work because I don't know how to find out the
+ * number of tokens
+ */
+
+ int num_tok;
+
+ num_tok = (sizeof(zztokens) / sizeof(*zztokens));
+ sizeof (zztokens);
+ sizeof (*zztokens);
+ token_names = (char **) malloc (sizeof (char *) * num_tok);
+
+ for (i = 0; i < num_tok; i++)
+ {
+ token_names[i] = zztokens[i];
+ }
+#endif
+
+ num_replace = (sizeof(new_tokens) / sizeof(*new_tokens));
+ for (i = 0; i < num_replace; i++)
+ {
+ const char *new = new_tokens[i].new_name;
+ const char **old = zztokens + new_tokens[i].token;
+
+ *old = new;
+ }
+}
+
+
+#ifdef USER_ZZSYN
+
+static void
+append_token_set (char *msg, SetWordType *a)
+{
+ SetWordType *p = a;
+ SetWordType *endp = &(p[zzSET_SIZE]);
+ unsigned e = 0;
+ int tokens_printed = 0;
+
+ do
+ {
+ SetWordType t = *p;
+ SetWordType *b = &(bitmask[0]);
+ do
+ {
+ if (t & *b)
+ {
+ strcat (msg, zztokens[e]);
+ tokens_printed++;
+ if (tokens_printed < zzset_deg (a) - 1)
+ strcat (msg, ", ");
+ else if (tokens_printed == zzset_deg (a) - 1)
+ strcat (msg, " or ");
+ }
+ e++;
+ } while (++b < &(bitmask[sizeof(SetWordType)*8]));
+ } while (++p < endp);
+}
+
+
+void
+zzsyn(const char * text,
+ int tok,
+ char * egroup,
+ SetWordType * eset,
+ int etok,
+ int k,
+ const char * bad_text)
+{
+ static char msg [MAX_ERROR];
+ int len;
+
+#ifndef ALLOW_WARNINGS
+ text = NULL; /* avoid "unused parameter" warning */
+#endif
+
+ /* Initial message: give location of error */
+
+ msg[0] = (char) 0; /* make sure string is empty to start! */
+ if (tok == zzEOF_TOKEN)
+ strcat (msg, "at end of input");
+ else
+ sprintf (msg, "found \"%s\"", bad_text);
+
+ len = strlen (msg);
+
+
+ /* Caller supplied neither a single token nor set of tokens expected... */
+
+ if (!etok && !eset)
+ {
+ syntax_error (msg);
+ return;
+ }
+ else
+ {
+ strcat (msg, ", ");
+ len += 2;
+ }
+
+
+ /* I'm not quite sure what this is all about, or where k would be != 1... */
+
+ if (k != 1)
+ {
+ sprintf (msg+len, "; \"%s\" not", bad_text);
+ if (zzset_deg (eset) > 1) strcat (msg, " in");
+ len = strlen (msg);
+ }
+
+
+ /* This is the code that usually gets run */
+
+ if (zzset_deg (eset) > 0)
+ {
+ if (zzset_deg (eset) == 1)
+ strcat (msg, "expected ");
+ else
+ strcat (msg, "expected one of: ");
+
+ append_token_set (msg, eset);
+ }
+ else
+ {
+ sprintf (msg+len, "expected %s", zztokens[etok]);
+ if (etok == ENTRY_CLOSE)
+ {
+ strcat (msg, " (skipping to next \"@\")");
+ initialize_lexer_state ();
+ }
+ }
+
+ len = strlen (msg);
+ if (egroup && strlen (egroup) > 0)
+ sprintf (msg+len, " in %s", egroup);
+
+ syntax_error (msg);
+
+}
+#endif /* USER_ZZSYN */
+
+
+void
+check_field_name (AST * field)
+{
+ char * name;
+
+ if (! field || field->nodetype != BTAST_FIELD)
+ return;
+
+ name = field->text;
+ if (strchr ("0123456789", name[0]))
+ syntax_error ("invalid field name \"%s\": cannot start with digit",
+ name);
+}
+
+
+#ifdef STACK_DUMP_CODE
+
+static void
+show_ast_stack_elem (int num)
+{
+ extern const char *nodetype_names[]; /* nicked from bibtex_ast.c */
+ /* bt_nodetype nodetype;
+ bt_metatype metatype; */
+ AST *elem;
+
+ elem = zzastStack[num];
+ printf ("zzastStack[%3d] = ", num);
+ if (elem)
+ {
+ /* get_node_type (elem, &nodetype, &metatype); */
+ if (elem->nodetype <= BTAST_MACRO)
+ {
+ printf ("{ %s: \"%s\" (line %d, char %d) }\n",
+ nodetype_names[elem->nodetype],
+ elem->text, elem->line, elem->offset);
+ }
+ else
+ {
+ printf ("bogus node (uninitialized?)\n");
+ }
+ }
+ else
+ {
+ printf ("NULL\n");
+ }
+}
+
+
+static void
+show_ast_stack_top (char *label)
+{
+ if (label)
+ printf ("%s: ast stack top: ", label);
+ else
+ printf ("ast stack top: ");
+ show_ast_stack_elem (zzast_sp);
+}
+
+
+static void
+dump_ast_stack (char *label)
+{
+ int i;
+
+ if (label)
+ printf ("%s: complete ast stack:\n", label);
+ else
+ printf ("complete ast stack:\n");
+
+ for (i = zzast_sp; i < ZZAST_STACKSIZE; i++)
+ {
+ printf (" ");
+ show_ast_stack_elem (i);
+ }
+}
+
+
+static void
+show_attrib_stack_elem (int num)
+{
+ Attrib elem;
+
+ elem = zzaStack[num];
+ printf ("zzaStack[%3d] = ", num);
+ printf ("{ \"%s\" (token %d (%s), line %d, char %d) }\n",
+ elem.text, elem.token, zztokens[elem.token],
+ elem.line, elem.offset);
+}
+
+
+static void
+show_attrib_stack_top (char *label)
+{
+ if (label)
+ printf ("%s: attrib stack top: ", label);
+ else
+ printf ("attrib stack top: ");
+ show_attrib_stack_elem (zzasp);
+}
+
+
+static void
+dump_attrib_stack (char *label)
+{
+ int i;
+
+ if (label)
+ printf ("%s: complete attrib stack:\n", label);
+ else
+ printf ("complete attrib stack:\n");
+
+ for (i = zzasp; i < ZZA_STACKSIZE; i++)
+ {
+ printf (" ");
+ show_attrib_stack_elem (i);
+ }
+}
+
+#endif /* STACK_DUMP_CODE */
diff --git a/src/translators/btparse/parse_auxiliary.h b/src/translators/btparse/parse_auxiliary.h
new file mode 100644
index 0000000..5500513
--- /dev/null
+++ b/src/translators/btparse/parse_auxiliary.h
@@ -0,0 +1,32 @@
+/* ------------------------------------------------------------------------
+@NAME : parse_auxiliary.h
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Prototype declarations for functions in parse_auxiliary.c
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/08, Greg Ward
+@MODIFIED :
+@VERSION : $Id: parse_auxiliary.h,v 1.5 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+#ifndef PARSE_AUXILIARY_H
+#define PARSE_AUXILIARY_H
+
+#include "stdpccts.h" /* for SetWordType typedef */
+
+void fix_token_names (void);
+void zzsyn (const char *text, int tok,
+ char *egroup, SetWordType *eset, int etok,
+ int k, const char *bad_text);
+void check_field_name (AST * field);
+
+#endif /* PARSE_AUXILIARY_H */
diff --git a/src/translators/btparse/postprocess.c b/src/translators/btparse/postprocess.c
new file mode 100644
index 0000000..7f7bfd4
--- /dev/null
+++ b/src/translators/btparse/postprocess.c
@@ -0,0 +1,498 @@
+/* ------------------------------------------------------------------------
+@NAME : postprocess.c
+@DESCRIPTION: Operations applied to the AST (or strings in it) after
+ parsing is complete.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/12, Greg Ward (from code in bibparse.c, lex_auxiliary.c)
+@MODIFIED :
+@VERSION : $Id: postprocess.c,v 1.25 2000/05/02 23:06:31 greg Exp $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "btparse.h"
+#include "error.h"
+#include "parse_auxiliary.h"
+#include "prototypes.h"
+/*#include "my_dmalloc.h"*/
+
+#define DEBUG 1
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_postprocess_string ()
+@INPUT : s
+ options
+@OUTPUT : s (modified in place according to the flags)
+@RETURNS : (void)
+@DESCRIPTION: Make a pass over string s (which is modified in-place) to
+ optionally collapse whitespace according to BibTeX rules
+ (if the BTO_COLLAPSE bit in options is true).
+
+ Rules for collapsing whitespace are:
+ * whitespace at beginning/end of string is deleted
+ * within the string, each whitespace sequence is replaced by
+ a single space
+
+ Note that part of the work is done by the lexer proper,
+ namely conversion of tabs and newlines to spaces.
+@GLOBALS :
+@CALLS :
+@CREATED : originally in lex_auxiliary.c; moved here 1997/01/12
+@MODIFIED :
+@COMMENTS : this only collapses whitespace now -- rename it???
+-------------------------------------------------------------------------- */
+void
+bt_postprocess_string (char * s, ushort options)
+{
+ boolean collapse_whitespace;
+ char *i, *j;
+ int len;
+
+ if (s == NULL) return; /* quit if no string supplied */
+
+#if DEBUG > 1
+ printf ("bt_postprocess_string: looking at >%s<\n", s);
+#endif
+
+ /* Extract any relevant options (just one currently) to local flags. */
+ collapse_whitespace = options & BTO_COLLAPSE;
+
+ /*
+ * N.B. i and j will both point into s; j is always >= i, and
+ * we copy characters from j to i. Whitespace is collapsed/deleted
+ * by advancing j without advancing i.
+ */
+ i = j = s; /* start both at beginning of string */
+
+ /*
+ * If we're supposed to collapse whitespace, then advance j to the
+ * first non-space character.
+ */
+ if (collapse_whitespace)
+ {
+ while (*j == ' ' && *j != (char) 0)
+ j++;
+ }
+
+ while (*j != (char) 0)
+ {
+ /*
+ * If we're in a string of spaces (ie. current and previous char.
+ * are both space), and we're supposed to be collapsing whitespace,
+ * then skip until we hit a non-space character (or end of string).
+ */
+ if (collapse_whitespace && *j == ' ' && *(j-1) == ' ')
+ {
+ while (*j == ' ') j++; /* skip spaces */
+ if (*j == (char) 0) /* reached end of string? */
+ break;
+ }
+
+ /* Copy the current character from j down to i */
+ *(i++) = *(j++);
+ }
+ *i = (char) 0; /* ensure string is terminated */
+
+
+ /*
+ * And mop up whitespace (if any) at end of string -- note that if there
+ * was any whitespace there, it has already been collapsed to exactly
+ * one space.
+ */
+ len = strlen (s);
+ if (len > 0 && collapse_whitespace && s[len-1] == ' ')
+ {
+ s[--len] = (char) 0;
+ }
+
+#if DEBUG > 1
+ printf (" transformed to >%s<\n", s);
+#endif
+
+} /* bt_postprocess_string */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_postprocess_value()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Post-processes a series of strings (compound value),
+ frequently found as the value of a "field = value" or "macro
+ = value" assignment. The actions taken here are governed by
+ the bits in 'options', but there are two distinct modes of
+ operation: pasting or not.
+
+ We paste strings if and only if the BTO_PASTE bit in options
+ is set and there are two or more simple values in the
+ compound value. In this case, the BTO_EXPAND bit must be set
+ (it would be very silly to paste together strings with
+ unexpanded macro names!), and we make two passes over the
+ data: one to postprocess individual strings and accumulate
+ the one big string, and a second to postprocess the big
+ string. In the first pass, the caller-supplied 'options'
+ variable is largely ignored; we will never collapse
+ whitespace in the individual strings. The caller's wishes
+ are fully respected when we make the final post-processing
+ pass over the concatenation of the individual strings,
+ though.
+
+ If we're not pasting strings, then the character of the
+ individual simple values will be preserved; macros might not
+ be expanded (depending on the BTO_EXPAND bit), numbers will
+ stay numbers, and strings will be post-processed
+ independently according to the 'options' variable. (Beware
+ -- this means you might collapse whitespace in individual
+ sub-strings, which would be bad if you intend to concatenate
+ them later in the BibTeX sense.)
+
+ The 'replace' parameter is used to govern whether the
+ existing strings in the AST should be replaced with their
+ post-processed versions. This can extend as far as
+ collapsing a series of simple values into a single BTAST_STRING
+ node, if we paste sub-strings together. If replace is FALSE,
+ the returned string is allocated here, and you must free() it
+ later.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/10, GPW
+@MODIFIED : 1997/08/25, GPW: renamed from bt_postprocess_field(), and changed
+ to take the head of a list of simple values,
+ rather than the parent of that list
+-------------------------------------------------------------------------- */
+char *
+bt_postprocess_value (AST * value, ushort options, boolean replace)
+{
+ AST * simple_value; /* current simple value */
+ boolean pasting;
+ ushort string_opts; /* what to do to individual strings */
+ int tot_len; /* total length of pasted string */
+ char * new_string; /* in case of string pasting */
+ char * tmp_string;
+ boolean free_tmp; /* should we free() tmp_string? */
+
+ if (value == NULL) return NULL;
+ if (value->nodetype != BTAST_STRING &&
+ value->nodetype != BTAST_NUMBER &&
+ value->nodetype != BTAST_MACRO)
+ {
+ usage_error ("bt_postprocess_value: invalid AST node (not a value)");
+ }
+
+
+ /*
+ * We will paste strings iff the user wants us to, and there are at least
+ * two simple values in the list headed by 'value'.
+ */
+
+ pasting = (options & BTO_PASTE) && (value->right);
+
+ /*
+ * If we're to concatenate (paste) sub-strings, we need to know the
+ * total length of them. So make a pass over all the sub-strings
+ * (simple values), adding up their lengths.
+ */
+
+ tot_len = 0; /* these are out here to keep */
+ new_string = NULL; /* gcc -Wall happy */
+ tmp_string = NULL;
+
+ if (pasting)
+ {
+ simple_value = value;
+ while (simple_value)
+ {
+ switch (simple_value->nodetype)
+ {
+ case BTAST_MACRO:
+ tot_len += bt_macro_length (simple_value->text);
+ break;
+ case BTAST_STRING:
+ tot_len += (simple_value->text)
+ ? (strlen (simple_value->text)) : 0;
+ break;
+ case BTAST_NUMBER:
+ tot_len += (simple_value->text)
+ ? (strlen (simple_value->text)) : 0;
+ break;
+ default:
+ internal_error ("simple value has bad nodetype (%d)",
+ (int) simple_value->nodetype);
+ }
+ simple_value = simple_value->right;
+ }
+
+ /* Now allocate the buffer in which we'll accumulate the whole string */
+
+ new_string = (char *) calloc (tot_len+1, sizeof (char));
+ }
+
+
+ /*
+ * Before entering the main loop, figure out just what
+ * bt_postprocess_string() is supposed to do -- eg. if pasting strings,
+ * we should not (yet) collapse whitespace. (That'll be done on the
+ * final, concatenated string -- assuming the caller put BTO_COLLAPSE in
+ * the options bitmap.)
+ */
+
+ if (pasting)
+ {
+ string_opts = options & ~BTO_COLLAPSE; /* turn off collapsing */
+ }
+ else
+ {
+ string_opts = options; /* leave it alone */
+ }
+
+ /*
+ * Sanity check: if we continue blindly on, we might stupidly
+ * concatenate a macro name and a literal string. So check for that.
+ * Converting numbers is superficial, but requiring that it be done
+ * keeps people honest.
+ */
+
+ if (pasting && ! (options & (BTO_CONVERT|BTO_EXPAND)))
+ {
+ usage_error ("bt_postprocess_value(): "
+ "must convert numbers and expand macros "
+ "when pasting substrings");
+ }
+
+ /*
+ * Now the main loop to process each string, and possibly tack it onto
+ * new_string.
+ */
+
+ simple_value = value;
+ while (simple_value)
+ {
+ tmp_string = NULL;
+ free_tmp = FALSE;
+
+ /*
+ * If this simple value is a macro and we're supposed to expand
+ * macros, then do so. We also have to post-process the string
+ * returned from the macro table, because they're stored there
+ * without whitespace collapsed; if we're supposed to be doing that
+ * to the current value (and we're not pasting), this is where it
+ * will get done.
+ */
+ if (simple_value->nodetype == BTAST_MACRO && (options & BTO_EXPAND))
+ {
+ tmp_string = bt_macro_text (simple_value->text,
+ simple_value->filename,
+ simple_value->line);
+ if (tmp_string != NULL)
+ {
+ tmp_string = strdup (tmp_string);
+ free_tmp = TRUE;
+ bt_postprocess_string (tmp_string, string_opts);
+ }
+
+ if (replace)
+ {
+ simple_value->nodetype = BTAST_STRING;
+ if (simple_value->text)
+ free (simple_value->text);
+ simple_value->text = tmp_string;
+ free_tmp = FALSE; /* mustn't free, it's now in the AST */
+ }
+ }
+
+ /*
+ * If the current simple value is a literal string, then just
+ * post-process it. This will be done in-place if 'replace' is
+ * true, otherwise a copy of the string will be post-processed.
+ */
+ else if (simple_value->nodetype == BTAST_STRING && simple_value->text)
+ {
+ if (replace)
+ {
+ tmp_string = simple_value->text;
+ }
+ else
+ {
+ tmp_string = strdup (simple_value->text);
+ free_tmp = TRUE;
+ }
+
+ bt_postprocess_string (tmp_string, string_opts);
+ }
+
+ /*
+ * Finally, if the current simple value is a number, change it to a
+ * string (depending on options) and get its value. We generally
+ * treat strings as numbers as equivalent, except of course numbers
+ * aren't post-processed -- there can't be any whitespace in them!
+ * The BTO_CONVERT option is mainly a sop to my strong-typing
+ * tendencies.
+ */
+ if (simple_value->nodetype == BTAST_NUMBER)
+ {
+ if (replace && (options & BTO_CONVERT))
+ simple_value->nodetype = BTAST_STRING;
+
+ if (simple_value->text)
+ {
+ if (replace)
+ tmp_string = simple_value->text;
+ else
+ {
+ tmp_string = strdup (simple_value->text);
+ free_tmp = TRUE;
+ }
+ }
+ }
+
+ if (pasting)
+ {
+ if (tmp_string)
+ strcat (new_string, tmp_string);
+ if (free_tmp)
+ free (tmp_string);
+ }
+ else
+ {
+ /*
+ * N.B. if tmp_string is NULL (eg. from a single undefined macro)
+ * we make a strdup() of the empty string -- this is so we can
+ * safely free() the string returned from this function
+ * at some future point.
+ *
+ * This strdup() seems to cause a 1-byte memory leak in some
+ * circumstances. I s'pose I should look into that some rainy
+ * afternoon...
+ */
+
+ new_string = (tmp_string != NULL) ? tmp_string : strdup ("");
+ }
+
+ simple_value = simple_value->right;
+ }
+
+ if (pasting)
+ {
+ int len;
+
+ len = strlen (new_string);
+ assert (len <= tot_len); /* hope we alloc'd enough! */
+
+ bt_postprocess_string (new_string, options);
+
+ /*
+ * If replacing data in the AST, delete all but first child of
+ * `field', and replace text for first child with new_string.
+ */
+
+ if (replace)
+ {
+ assert (value->right != NULL); /* there has to be > 1 simple value! */
+ zzfree_ast (value->right); /* free from second simple value on */
+ value->right = NULL; /* remind ourselves they're gone */
+ if (value->text) /* free text of first simple value */
+ free (value->text);
+ value->text = new_string; /* and replace it with concatenation */
+ }
+ }
+
+ return new_string;
+
+} /* bt_postprocess_value() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_postprocess_field()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Postprocesses all the strings in a single "field = value"
+ assignment subtree. Just checks that 'field' does indeed
+ point to an BTAST_FIELD node (presumably the parent of a list
+ of simple values), downcases the field name, and calls
+ bt_postprocess_value() on the value.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/08/25, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+char *
+bt_postprocess_field (AST * field, ushort options, boolean replace)
+{
+ if (field == NULL) return NULL;
+ if (field->nodetype != BTAST_FIELD)
+ usage_error ("bt_postprocess_field: invalid AST node (not a field)");
+
+ strlwr (field->text); /* downcase field name */
+ return bt_postprocess_value (field->down, options, replace);
+
+} /* bt_postprocess_field() */
+
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_postprocess_entry()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Postprocesses all the strings in an entry: collapse whitespace,
+ concatenate substrings, expands macros, and whatnot.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/10, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_postprocess_entry (AST * top, ushort options)
+{
+ AST *cur;
+
+ if (top == NULL) return; /* not even an entry at all! */
+ if (top->nodetype != BTAST_ENTRY)
+ usage_error ("bt_postprocess_entry: "
+ "invalid node type (not entry root)");
+ strlwr (top->text); /* downcase entry type */
+
+ if (top->down == NULL) return; /* no children at all */
+
+ cur = top->down;
+ if (cur->nodetype == BTAST_KEY)
+ cur = cur->right;
+
+ switch (top->metatype)
+ {
+ case BTE_REGULAR:
+ case BTE_MACRODEF:
+ {
+ while (cur)
+ {
+ bt_postprocess_field (cur, options, TRUE);
+ if (top->metatype == BTE_MACRODEF && ! (options & BTO_NOSTORE))
+ bt_add_macro_value (cur, options);
+
+ cur = cur->right;
+ }
+ break;
+ }
+
+ case BTE_COMMENT:
+ case BTE_PREAMBLE:
+ bt_postprocess_value (cur, options, TRUE);
+ break;
+ default:
+ internal_error ("bt_postprocess_entry: unknown entry metatype (%d)",
+ (int) top->metatype);
+ }
+
+} /* bt_postprocess_entry() */
diff --git a/src/translators/btparse/prototypes.h b/src/translators/btparse/prototypes.h
new file mode 100644
index 0000000..88beada
--- /dev/null
+++ b/src/translators/btparse/prototypes.h
@@ -0,0 +1,47 @@
+/* ------------------------------------------------------------------------
+@NAME : prototypes.h
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Prototype declarations for functions from various places.
+ Only functions that are private to the library (but shared
+ between files within the library) are declared here.
+ Functions that are "exported from" the library (ie. usable
+ by and expected to be used by library user) are declared in
+ btparse.h.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/12, Greg Ward
+@MODIFIED :
+@VERSION : $Id: prototypes.h,v 1.14 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+#ifndef PROTOTYPES_H
+#define PROTOTYPES_H
+
+#include <stdio.h>
+#include "btparse.h" /* for types */
+
+/* util.c */
+#if !HAVE_STRLWR
+char *strlwr (char *s);
+#endif
+#if !HAVE_STRUPR
+char *strupr (char *s);
+#endif
+
+/* macros.c */
+void init_macros (void);
+void done_macros (void);
+
+/* bibtex_ast.c */
+void dump_ast (char *msg, AST *root);
+
+#endif /* PROTOTYPES_H */
diff --git a/src/translators/btparse/scan.c b/src/translators/btparse/scan.c
new file mode 100644
index 0000000..b9899e4
--- /dev/null
+++ b/src/translators/btparse/scan.c
@@ -0,0 +1,615 @@
+
+/* parser.dlg -- DLG Description of scanner
+ *
+ * Generated from: bibtex.g
+ *
+ * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994
+ * Purdue University Electrical Engineering
+ * With AHPCRC, University of Minnesota
+ * ANTLR Version 1.33
+ */
+
+#include <stdio.h>
+#define ANTLR_VERSION 133
+
+#define ZZCOL
+#define USER_ZZSYN
+
+#include "btconfig.h"
+#include "btparse.h"
+#include "attrib.h"
+#include "lex_auxiliary.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+
+extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */
+#include "antlr.h"
+#include "ast.h"
+#include "tokens.h"
+#include "dlgdef.h"
+LOOKAHEAD
+void zzerraction()
+{
+ (*zzerr)("invalid token");
+ zzadvance();
+ zzskip();
+}
+/*
+ * D L G tables
+ *
+ * Generated from: parser.dlg
+ *
+ * 1989-1994 by Will Cohen, Terence Parr, and Hank Dietz
+ * Purdue University Electrical Engineering
+ * DLG Version 1.33
+ */
+
+#include "mode.h"
+
+
+
+static void act1()
+{
+ NLA = 1;
+ }
+
+
+static void act2()
+{
+ NLA = AT;
+ at_sign ();
+ }
+
+
+static void act3()
+{
+ NLA = 3;
+ newline ();
+ }
+
+
+static void act4()
+{
+ NLA = COMMENT;
+ comment ();
+ }
+
+
+static void act5()
+{
+ NLA = 5;
+ zzskip ();
+ }
+
+
+static void act6()
+{
+ NLA = 6;
+ toplevel_junk ();
+ }
+
+static unsigned char shift0[257] = {
+ 0, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 4, 2, 5, 5, 4, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 4, 5, 5, 5, 5, 3, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 1, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5
+};
+
+
+static void act7()
+{
+ NLA = 1;
+ }
+
+
+static void act8()
+{
+ NLA = 7;
+ newline ();
+ }
+
+
+static void act9()
+{
+ NLA = COMMENT;
+ comment ();
+ }
+
+
+static void act10()
+{
+ NLA = 8;
+ zzskip ();
+ }
+
+
+static void act11()
+{
+ NLA = NUMBER;
+ }
+
+
+static void act12()
+{
+ NLA = NAME;
+ name ();
+ }
+
+
+static void act13()
+{
+ NLA = LBRACE;
+ lbrace ();
+ }
+
+
+static void act14()
+{
+ NLA = RBRACE;
+ rbrace ();
+ }
+
+
+static void act15()
+{
+ NLA = ENTRY_OPEN;
+ lparen ();
+ }
+
+
+static void act16()
+{
+ NLA = ENTRY_CLOSE;
+ rparen ();
+ }
+
+
+static void act17()
+{
+ NLA = EQUALS;
+ }
+
+
+static void act18()
+{
+ NLA = HASH;
+ }
+
+
+static void act19()
+{
+ NLA = COMMA;
+ }
+
+
+static void act20()
+{
+ NLA = 18;
+ start_string ('"');
+ }
+
+static unsigned char shift1[257] = {
+ 0, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 3, 1, 14, 14, 3, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 3, 5, 13, 11, 5, 2, 5,
+ 14, 8, 9, 5, 5, 12, 5, 5, 5, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
+ 5, 5, 10, 5, 5, 14, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 14, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 6, 5, 7, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14
+};
+
+
+static void act21()
+{
+ NLA = 1;
+ }
+
+
+static void act22()
+{
+ NLA = 19;
+ check_runaway_string ();
+ }
+
+
+static void act23()
+{
+ NLA = 20;
+ zzreplchar (' '); zzmore ();
+ }
+
+
+static void act24()
+{
+ NLA = 21;
+ open_brace ();
+ }
+
+
+static void act25()
+{
+ NLA = 22;
+ close_brace ();
+ }
+
+
+static void act26()
+{
+ NLA = 23;
+ lparen_in_string ();
+ }
+
+
+static void act27()
+{
+ NLA = 24;
+ rparen_in_string ();
+ }
+
+
+static void act28()
+{
+ NLA = STRING;
+ quote_in_string ();
+ }
+
+
+static void act29()
+{
+ NLA = 26;
+ zzmore ();
+ }
+
+static unsigned char shift2[257] = {
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 1, 3, 3, 2, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 8, 3, 3, 3, 3,
+ 3, 6, 7, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 9, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 4, 3, 5, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3
+};
+
+#define DfaStates 38
+typedef unsigned char DfaState;
+
+static DfaState st0[7] = {
+ 1, 2, 3, 4, 5, 6, 38
+};
+
+static DfaState st1[7] = {
+ 38, 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st2[7] = {
+ 38, 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st3[7] = {
+ 38, 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st4[7] = {
+ 38, 7, 8, 9, 7, 9, 38
+};
+
+static DfaState st5[7] = {
+ 38, 38, 38, 38, 5, 38, 38
+};
+
+static DfaState st6[7] = {
+ 38, 38, 38, 6, 38, 6, 38
+};
+
+static DfaState st7[7] = {
+ 38, 7, 8, 7, 7, 7, 38
+};
+
+static DfaState st8[7] = {
+ 38, 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st9[7] = {
+ 38, 7, 8, 9, 7, 9, 38
+};
+
+static DfaState st10[16] = {
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 38, 38
+};
+
+static DfaState st11[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st12[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st13[16] = {
+ 38, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 38
+};
+
+static DfaState st14[16] = {
+ 38, 38, 38, 14, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st15[16] = {
+ 38, 38, 38, 38, 15, 16, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st16[16] = {
+ 38, 38, 38, 38, 16, 16, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st17[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st18[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st19[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st20[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st21[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st22[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st23[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st24[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st25[16] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38
+};
+
+static DfaState st26[16] = {
+ 38, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 38
+};
+
+static DfaState st27[11] = {
+ 28, 29, 30, 31, 32, 33, 34, 35, 36, 31,
+ 38
+};
+
+static DfaState st28[11] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st29[11] = {
+ 38, 38, 37, 37, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st30[11] = {
+ 38, 38, 31, 31, 38, 38, 38, 38, 38, 31,
+ 38
+};
+
+static DfaState st31[11] = {
+ 38, 38, 31, 31, 38, 38, 38, 38, 38, 31,
+ 38
+};
+
+static DfaState st32[11] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st33[11] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st34[11] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st35[11] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st36[11] = {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+static DfaState st37[11] = {
+ 38, 38, 37, 37, 38, 38, 38, 38, 38, 38,
+ 38
+};
+
+
+DfaState *dfa[38] = {
+ st0,
+ st1,
+ st2,
+ st3,
+ st4,
+ st5,
+ st6,
+ st7,
+ st8,
+ st9,
+ st10,
+ st11,
+ st12,
+ st13,
+ st14,
+ st15,
+ st16,
+ st17,
+ st18,
+ st19,
+ st20,
+ st21,
+ st22,
+ st23,
+ st24,
+ st25,
+ st26,
+ st27,
+ st28,
+ st29,
+ st30,
+ st31,
+ st32,
+ st33,
+ st34,
+ st35,
+ st36,
+ st37
+};
+
+
+DfaState accepts[39] = {
+ 0, 1, 2, 3, 6, 5, 6, 0, 4, 6,
+ 0, 7, 8, 0, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 9, 0, 0, 21, 22,
+ 23, 29, 24, 25, 26, 27, 28, 22, 0
+};
+
+void (*actions[30])() = {
+ zzerraction,
+ act1,
+ act2,
+ act3,
+ act4,
+ act5,
+ act6,
+ act7,
+ act8,
+ act9,
+ act10,
+ act11,
+ act12,
+ act13,
+ act14,
+ act15,
+ act16,
+ act17,
+ act18,
+ act19,
+ act20,
+ act21,
+ act22,
+ act23,
+ act24,
+ act25,
+ act26,
+ act27,
+ act28,
+ act29
+};
+
+static DfaState dfa_base[] = {
+ 0,
+ 10,
+ 27
+};
+
+static unsigned char *b_class_no[] = {
+ shift0,
+ shift1,
+ shift2
+};
+
+
+
+#define ZZSHIFT(c) (b_class_no[zzauto][1+c])
+#define MAX_MODE 3
+#include "dlgauto.h"
diff --git a/src/translators/btparse/stdpccts.h b/src/translators/btparse/stdpccts.h
new file mode 100644
index 0000000..e232634
--- /dev/null
+++ b/src/translators/btparse/stdpccts.h
@@ -0,0 +1,31 @@
+#ifndef STDPCCTS_H
+#define STDPCCTS_H
+/*
+ * stdpccts.h -- P C C T S I n c l u d e
+ *
+ * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994
+ * Purdue University Electrical Engineering
+ * With AHPCRC, University of Minnesota
+ * ANTLR Version 1.33
+ */
+#include <stdio.h>
+#define ANTLR_VERSION 133
+
+#define ZZCOL
+#define USER_ZZSYN
+
+#include "btparse.h"
+#include "attrib.h"
+#include "lex_auxiliary.h"
+#include "error.h"
+/*#include "my_dmalloc.h"*/
+
+extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */
+#define GENAST
+#define zzSET_SIZE 4
+#include "antlr.h"
+#include "ast.h"
+#include "tokens.h"
+#include "dlgdef.h"
+#include "mode.h"
+#endif
diff --git a/src/translators/btparse/string_util.c b/src/translators/btparse/string_util.c
new file mode 100644
index 0000000..3713608
--- /dev/null
+++ b/src/translators/btparse/string_util.c
@@ -0,0 +1,695 @@
+/* ------------------------------------------------------------------------
+@NAME : string_util.c
+@DESCRIPTION: Various string-processing utility functions:
+ bt_purify_string()
+ bt_change_case()
+
+ and their helpers:
+ foreign_letter()
+ purify_special_char()
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/10/19, Greg Ward
+@MODIFIED : 1997/11/25, GPW: renamed to from purify.c to string_util.c
+ added bt_change_case() and friends
+@VERSION : $Id: string_util.c,v 1.10 1999/10/28 22:50:28 greg Rel $
+-------------------------------------------------------------------------- */
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include "error.h"
+#include "btparse.h"
+#include "bt_debug.h"
+
+
+/*
+ * These definitions should be fixed to be consistent with HTML
+ * entities, just for fun. And perhaps I should add entries for
+ * accented letters (at least those supported by TeX and HTML).
+ */
+typedef enum
+{
+ L_OTHER, /* not a "foreign" letter */
+ L_OSLASH_L, /* Eastern European {\o} */
+ L_OSLASH_U,
+ L_LSLASH_L, /* {\l} */
+ L_LSLASH_U,
+ L_OELIG_L, /* Latin {\oe} ligature */
+ L_OELIG_U,
+ L_AELIG_L, /* {\ae} ligature */
+ L_AELIG_U,
+ L_SSHARP_L, /* German "sharp s" {\ss} */
+ L_SSHARP_U,
+ L_ACIRCLE_L, /* Nordic {\aa} */
+ L_ACIRCLE_U,
+ L_INODOT_L, /* undotted i: {\i} */
+ L_JNODOT_L /* {\j} */
+} bt_letter;
+
+
+static const char * uc_version[] =
+{
+ NULL, /* L_OTHER */
+ "\\O", /* L_OSLASH_L */
+ "\\O", /* L_OSLASH_U */
+ "\\L", /* L_LSLASH_L */
+ "\\L", /* L_LSLASH_U */
+ "\\OE", /* L_OELIG_L */
+ "\\OE", /* L_OELIG_U */
+ "\\AE", /* L_AELIG_L */
+ "\\AE", /* L_AELIG_U */
+ "SS", /* L_SSHARP_L -- for LaTeX 2.09 */
+ "\\SS", /* L_SSHARP_U */
+ "\\AA", /* L_ACIRCLE_L */
+ "\\AA", /* L_ACIRCLE_U */
+ "I", /* L_INODOT_L */
+ "J" /* L_JNODOT_L */
+};
+
+static const char * lc_version[] =
+{
+ NULL, /* L_OTHER */
+ "\\o", /* L_OSLASH_L */
+ "\\o", /* L_OSLASH_U */
+ "\\l", /* L_LSLASH_L */
+ "\\l", /* L_LSLASH_U */
+ "\\oe", /* L_OELIG_L */
+ "\\oe", /* L_OELIG_U */
+ "\\ae", /* L_AELIG_L */
+ "\\ae", /* L_AELIG_U */
+ "\\ss", /* L_SSHARP_L */
+ "\\ss", /* L_SSHARP_U */
+ "\\aa", /* L_ACIRCLE_L */
+ "\\aa", /* L_ACIRCLE_U */
+ "\\i", /* L_INODOT_L */
+ "\\j" /* L_JNODOT_L */
+};
+
+
+
+/* ------------------------------------------------------------------------
+@NAME : foreign_letter()
+@INPUT : str
+ start
+ stop
+@OUTPUT : letter
+@RETURNS : TRUE if the string delimited by start and stop is a foreign
+ letter control sequence
+@DESCRIPTION: Determines if a character sequence is one of (La)TeX's
+ "foreign letter" control sequences (l, o, ae, oe, aa, ss, plus
+ uppercase versions). If `letter' is non-NULL, returns which
+ letter was found in it (as a bt_letter value).
+@CALLS :
+@CALLERS : purify_special_char()
+@CREATED : 1997/10/19, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static boolean
+foreign_letter (char *str, int start, int stop, bt_letter * letter)
+{
+ char c1, c2;
+ bt_letter dummy;
+
+
+ /*
+ * This is written for speed, not flexibility -- adding new foreign
+ * letters would be trying and vexatious.
+ *
+ * N.B. my gold standard list of foreign letters is Kopka and Daly's
+ * *A Guide to LaTeX 2e*, section 2.5.6.
+ */
+
+ if (letter == NULL) /* so we can assign to *letter */
+ letter = &dummy; /* without compunctions */
+ *letter = L_OTHER; /* assume not a "foreign" letter */
+
+ c1 = str[start+0]; /* only two characters that we're */
+ c2 = str[start+1]; /* interested in */
+
+ switch (stop - start)
+ {
+ case 1: /* one-character control sequences */
+ switch (c1) /* (\o and \l) */
+ {
+ case 'o':
+ *letter = L_OSLASH_L; return TRUE;
+ case 'O':
+ *letter = L_OSLASH_U; return TRUE;
+ case 'l':
+ *letter = L_LSLASH_L; return TRUE;
+ case 'L':
+ *letter = L_LSLASH_L; return TRUE;
+ case 'i':
+ *letter = L_INODOT_L; return TRUE;
+ case 'j':
+ *letter = L_JNODOT_L; return TRUE;
+ default:
+ return FALSE;
+ }
+ break;
+ case 2: /* two character control sequences */
+ switch (c1) /* (\oe, \ae, \aa, and \ss) */
+ {
+ case 'o':
+ if (c2 == 'e') { *letter = L_OELIG_L; return TRUE; }
+ case 'O':
+ if (c2 == 'E') { *letter = L_OELIG_U; return TRUE; }
+
+ /* BibTeX 0.99 does not handle \aa and \AA -- but I do!*/
+ case 'a':
+ if (c2 == 'e')
+ { *letter = L_AELIG_L; return TRUE; }
+ else if (c2 == 'a')
+ { *letter = L_ACIRCLE_L; return TRUE; }
+ else
+ return FALSE;
+ case 'A':
+ if (c2 == 'E')
+ { *letter = L_AELIG_U; return TRUE; }
+ else if (c2 == 'A')
+ { *letter = L_ACIRCLE_U; return TRUE; }
+ else
+ return FALSE;
+
+ /* uppercase sharp-s -- new with LaTeX 2e (so far all I do
+ * is recognize it as a "foreign" letter)
+ */
+ case 's':
+ if (c2 == 's')
+ { *letter = L_SSHARP_L; return TRUE; }
+ else
+ return FALSE;
+ case 'S':
+ if (c2 == 'S')
+ { *letter = L_SSHARP_U; return TRUE; }
+ else
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ } /* switch on length of control sequence */
+
+ internal_error ("foreign_letter(): should never reach end of function");
+ return FALSE; /* to keep gcc -Wall happy */
+
+} /* foreign_letter */
+
+
+/* ------------------------------------------------------------------------
+@NAME : purify_special_char()
+@INPUT : *src, *dst - pointers into the input and output strings
+@OUTPUT : *src - updated to point to the closing brace of the
+ special char
+ *dst - updated to point to the next available spot
+ for copying text to
+@RETURNS :
+@DESCRIPTION: "Purifies" a BibTeX special character. On input, *src should
+ point to the opening brace of a special character (ie. the
+ brace must be at depth 0 of the whole string, and the
+ character immediately following it must be a backslash).
+ *dst should point to the next spot to copy into the output
+ (purified) string. purify_special_char() will skip over the
+ opening brace and backslash; if the control sequence is one
+ of LaTeX's foreign letter sequences (as determined by
+ foreign_letter()), then it is simply copied to *dst.
+ Otherwise the control sequence is skipped. In either case,
+ text after the control sequence is either copied (alphabetic
+ characters) or skipped (anything else, including hyphens,
+ ties, and digits).
+@CALLS : foreign_letter()
+@CALLERS : bt_purify_string()
+@CREATED : 1997/10/19, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+purify_special_char (char *str, int * src, int * dst)
+{
+ int depth;
+ int peek;
+
+ assert (str[*src] == '{' && str[*src + 1] == '\\');
+ depth = 1;
+
+ *src += 2; /* jump to start of control sequence */
+ peek = *src; /* scan to end of control sequence */
+ while (isalpha (str[peek]))
+ peek++;
+ if (peek == *src) /* in case of single-char, non-alpha */
+ peek++; /* control sequence (eg. {\'e}) */
+
+ if (foreign_letter (str, *src, peek, NULL))
+ {
+ assert (peek - *src == 1 || peek - *src == 2);
+ str[(*dst)++] = str[(*src)++]; /* copy first char */
+ if (*src < peek) /* copy second char, downcasing */
+ str[(*dst)++] = tolower (str[(*src)++]);
+ }
+ else /* not a foreign letter -- skip */
+ { /* the control sequence entirely */
+ *src = peek;
+ }
+
+ while (str[*src])
+ {
+ switch (str[*src])
+ {
+ case '{':
+ depth++;
+ (*src)++;
+ break;
+ case '}':
+ depth--;
+ if (depth == 0) return; /* done with special char */
+ (*src)++;
+ break;
+ default:
+ if (isalpha (str[*src])) /* copy alphabetic chars */
+ str[(*dst)++] = str[(*src)++];
+ else /* skip everything else */
+ (*src)++;
+ }
+ }
+
+ /*
+ * If we get here, we have unbalanced braces -- the '}' case should
+ * always hit a depth == 0 point if braces are balanced. No warning,
+ * though, because a) BibTeX doesn't warn about purifying unbalanced
+ * strings, and b) we (should have) already warned about it in the
+ * lexer.
+ */
+
+} /* purify_special_char() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_purify_string()
+@INOUT : instr
+@INPUT : options
+@OUTPUT :
+@RETURNS : instr - same as input string, but modified in place
+@DESCRIPTION: "Purifies" a BibTeX string. This consists of copying
+ alphanumeric characters, converting hyphens and ties to
+ space, copying spaces, and skipping everything else. (Well,
+ almost -- special characters are handled specially, of
+ course. Basically, accented letters have the control
+ sequence skipped, while foreign letters have the control
+ sequence preserved in a reasonable manner. See
+ purify_special_char() for details.)
+@CALLS : purify_special_char()
+@CALLERS :
+@CREATED : 1997/10/19, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_purify_string (char * string, ushort options)
+{
+ int src, /* both indeces into string */
+ dst;
+ int depth; /* brace depth in string */
+ unsigned orig_len;
+
+ /*
+ * Since purification always copies or deletes chars, outstr will
+ * be no longer than string -- so nothing fancy is required to put
+ * an upper bound on its eventual size.
+ */
+
+ depth = 0;
+ src = 0;
+ dst = 0;
+ orig_len = strlen (string);
+
+ DBG_ACTION (1, printf ("bt_purify_string(): input = %p (%s)\n",
+ string, string));
+
+ while (string[src] != (char) 0)
+ {
+ DBG_ACTION (2, printf (" next: >%c<: ", string[src]));
+ switch (string[src])
+ {
+ case '~': /* "separator" characters -- */
+ case '-': /* replaced with space */
+ case ' ': /* and copy an actual space */
+ string[dst++] = ' ';
+ src++;
+ DBG_ACTION (2, printf ("replacing with space"));
+ break;
+ case '{':
+ if (depth == 0 && string[src+1] == '\\')
+ {
+ DBG_ACTION (2, printf ("special char found"));
+ purify_special_char (string, &src, &dst);
+ }
+ else
+ {
+ DBG_ACTION (2, printf ("ordinary open brace"));
+ src++;
+ }
+ depth++;
+ break;
+ case '}':
+ DBG_ACTION (2, printf ("close brace"));
+ depth--;
+ src++;
+ break;
+ default:
+ if (isalnum (string[src])) /* any alphanumeric char -- */
+ {
+ DBG_ACTION (2, printf ("alphanumeric -- copying"));
+ string[dst++] = string[src++]; /* copy it */
+ }
+ else /* anything else -- skip it */
+ {
+ DBG_ACTION (2, printf ("non-separator, non-brace, non-alpha"));
+ src++;
+ }
+ } /* switch string[src] */
+
+ DBG_ACTION (2, printf ("\n"));
+
+ } /* while string[src] */
+
+ DBG_ACTION (1, printf ("bt_purify_string(): depth on exit: %d\n", depth));
+
+ string[dst] = (char) 0;
+ assert (strlen (string) <= orig_len);
+} /* bt_purify_string() */
+
+
+/* ======================================================================
+ * Case-transformation stuff
+ */
+
+
+/* ------------------------------------------------------------------------
+@NAME : convert_special_char()
+@INPUT : transform
+@INOUT : string
+ src
+ dst
+ start_sentence
+ after_colon
+@RETURNS :
+@DESCRIPTION: Does case conversion on a special character.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/11/25, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+convert_special_char (char transform,
+ char * string,
+ int * src,
+ int * dst,
+ boolean * start_sentence,
+ boolean * after_colon)
+{
+ int depth;
+ boolean done_special;
+ int cs_end;
+ int cs_len; /* counting the backslash */
+ bt_letter letter;
+ const char * repl;
+ int repl_len;
+
+#ifndef ALLOW_WARNINGS
+ repl = NULL; /* silence "might be used" */
+ /* uninitialized" warning */
+#endif
+
+ /* First, copy just the opening brace */
+ string[(*dst)++] = string[(*src)++];
+
+ /*
+ * Now loop over characters inside the braces -- stop when we reach
+ * the matching close brace, or when the string ends.
+ */
+ depth = 1; /* because we're in a special char */
+ done_special = FALSE;
+
+ while (string[*src] != 0 && !done_special)
+ {
+ switch (string[*src])
+ {
+ case '\\': /* a control sequence */
+ {
+ cs_end = *src+1; /* scan over chars of c.s. */
+ while (isalpha (string[cs_end]))
+ cs_end++;
+
+ /*
+ * OK, now *src points to the backslash (so src+*1 points to
+ * first char. of control sequence), and cs_end points to
+ * character immediately following end of control sequence.
+ * Thus we analyze [*src+1..cs_end] to determine if the control
+ * sequence is a foreign letter, and use (cs_end - (*src+1) + 1)
+ * = (cs_end - *src) as the length of the control sequence.
+ */
+
+ cs_len = cs_end - *src; /* length of cs, counting backslash */
+
+ if (foreign_letter (string, *src+1, cs_end, &letter))
+ {
+ if (letter == L_OTHER)
+ internal_error ("impossible foreign letter");
+
+ switch (transform)
+ {
+ case 'u':
+ repl = uc_version[(int) letter];
+ break;
+ case 'l':
+ repl = lc_version[(int) letter];
+ break;
+ case 't':
+ if (*start_sentence || *after_colon)
+ {
+ repl = uc_version[(int) letter];
+ *start_sentence = *after_colon = FALSE;
+ }
+ else
+ {
+ repl = lc_version[(int) letter];
+ }
+ break;
+ default:
+ internal_error ("impossible case transform \"%c\"",
+ transform);
+ }
+
+ repl_len = strlen (repl);
+ if (repl_len > cs_len)
+ internal_error
+ ("replacement text longer than original cs");
+
+ strncpy (string + *dst, repl, repl_len);
+ *src = cs_end;
+ *dst += repl_len;
+ } /* control sequence is a foreign letter */
+ else
+ {
+ /* not a foreign letter -- just copy the control seq. as is */
+
+
+ strncpy (string + *dst, string + *src, cs_end - *src);
+ *src += cs_len;
+ assert (*src == cs_end);
+ *dst += cs_len;
+ } /* control sequence not a foreign letter */
+
+ break;
+ } /* case: '\\' */
+
+ case '{':
+ {
+ string[(*dst)++] = string[(*src)++];
+ depth++;
+ break;
+ }
+
+ case '}':
+ {
+ string[(*dst)++] = string[(*src)++];
+ depth--;
+ if (depth == 0)
+ done_special = TRUE;
+ break;
+ }
+
+ default: /* any other character */
+ {
+ switch (transform)
+ {
+ /*
+ * Inside special chars, lowercase and title caps are same.
+ * (At least, that's bibtex's convention. I might change this
+ * at some point to be a bit smarter.)
+ */
+ case 'l':
+ case 't':
+ string[(*dst)++] = tolower (string[(*src)++]);
+ break;
+ case 'u':
+ string[(*dst)++] = toupper (string[(*src)++]);
+ break;
+ default:
+ internal_error ("impossible case transform \"%c\"",
+ transform);
+ }
+ } /* default char */
+
+ } /* switch: current char */
+
+ } /* while: string or special char not done */
+
+} /* convert_special_char() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_change_case()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Converts a string (in-place) to either uppercase, lowercase,
+ or "title capitalization">
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/11/25, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_change_case (char transform,
+ char * string,
+ ushort options)
+{
+ int len;
+ int depth;
+ int src, dst; /* indeces into string */
+ boolean start_sentence;
+ boolean after_colon;
+
+ src = dst = 0;
+ len = strlen (string);
+ depth = 0;
+
+ start_sentence = TRUE;
+ after_colon = FALSE;
+
+ while (string[src] != 0)
+ {
+ switch (string[src])
+ {
+ case '{':
+
+ /*
+ * At start of special character? The entire special char.
+ * will be handled here, as follows:
+ * - text at any brace-depth within the s.c. is case-mangled;
+ * punctuation (sentence endings, colons) are ignored
+ * - control sequences are left alone, unless they are
+ * one of the "foreign letter" control sequences, in
+ * which case they're converted to the appropriate string
+ * according to the uc_version or lc_version tables.
+ */
+ if (depth == 0 && string[src+1] == '\\')
+ {
+ convert_special_char (transform, string, &src, &dst,
+ &start_sentence, &after_colon);
+ }
+
+ /*
+ * Otherwise, it's just something in braces. This is probably
+ * a proper noun or something encased in braces to protect it
+ * from case-mangling, so we do not case-mangle it. However,
+ * we *do* switch out of start_sentence or after_colon mode if
+ * we happen to be there (otherwise we'll do the wrong thing
+ * once we're out of the braces).
+ */
+ else
+ {
+ string[dst++] = string[src++];
+ start_sentence = after_colon = FALSE;
+ depth++;
+ }
+ break;
+
+ case '}':
+ string[dst++] = string[src++];
+ depth--;
+ break;
+
+ /*
+ * Sentence-ending punctuation and colons are handled separately
+ * to allow for exact mimicing of BibTeX's behaviour. I happen
+ * to think that this behaviour (capitalize first word of sentences
+ * in a title) is better than BibTeX's, but I want to keep my
+ * options open for a future goal of perfect compatability.
+ */
+ case '.':
+ case '?':
+ case '!':
+ start_sentence = TRUE;
+ string[dst++] = string[src++];
+ break;
+
+ case ':':
+ after_colon = TRUE;
+ string[dst++] = string[src++];
+ break;
+
+ default:
+ if (isspace (string[src]))
+ {
+ string[dst++] = string[src++];
+ }
+ else
+ {
+ if (depth == 0)
+ {
+ switch (transform)
+ {
+ case 'u':
+ string[dst++] = toupper (string[src++]);
+ break;
+ case 'l':
+ string[dst++] = tolower (string[src++]);
+ break;
+ case 't':
+ if (start_sentence || after_colon)
+ {
+ /*
+ * XXX BibTeX only preserves case of character
+ * immediately after a colon; I do two things
+ * differently: first, I pay attention to sentence
+ * punctuation, and second I force uppercase
+ * at start of sentence or after a colon.
+ */
+ string[dst++] = toupper (string[src++]);
+ start_sentence = after_colon = FALSE;
+ }
+ else
+ {
+ string[dst++] = tolower (string[src++]);
+ }
+ break;
+ default:
+ internal_error ("impossible case transform \"%c\"",
+ transform);
+ }
+ } /* depth == 0 */
+ else
+ {
+ string[dst++] = string[src++];
+ }
+ } /* not blank */
+ } /* switch on current character */
+
+ } /* while not at end of string */
+
+} /* bt_change_case */
diff --git a/src/translators/btparse/sym.c b/src/translators/btparse/sym.c
new file mode 100644
index 0000000..2426dea
--- /dev/null
+++ b/src/translators/btparse/sym.c
@@ -0,0 +1,372 @@
+/*
+ * Simple symbol table manager using coalesced chaining to resolve collisions
+ *
+ * Doubly-linked lists are used for fast removal of entries.
+ *
+ * 'sym.h' must have a definition for typedef "Sym". Sym must include at
+ * minimum the following fields:
+ *
+ * ...
+ * char *symbol;
+ * struct ... *next, *prev, **head, *scope;
+ * unsigned int hash;
+ * ...
+ *
+ * 'template.h' can be used as a template to create a 'sym.h'.
+ *
+ * 'head' is &(table[hash(itself)]).
+ * The hash table is not resizable at run-time.
+ * The scope field is used to link all symbols of a current scope together.
+ * Scope() sets the current scope (linked list) to add symbols to.
+ * Any number of scopes can be handled. The user passes the address of
+ * a pointer to a symbol table
+ * entry (INITIALIZED TO NULL first time).
+ *
+ * Available Functions:
+ *
+ * zzs_init(s1,s2) -- Create hash table with size s1, string table size s2.
+ * zzs_done() -- Free hash and string table created with zzs_init().
+ * zzs_add(key,rec)-- Add 'rec' with key 'key' to the symbol table.
+ * zzs_newadd(key) -- create entry; add using 'key' to the symbol table.
+ * zzs_get(key) -- Return pointer to last record entered under 'key'
+ * Else return NULL
+ * zzs_del(p) -- Unlink the entry associated with p. This does
+ * NOT free 'p' and DOES NOT remove it from a scope
+ * list. If it was a part of your intermediate code
+ * tree or another structure. It will still be there.
+ * It is only removed from further consideration
+ * by the symbol table.
+ * zzs_keydel(s) -- Unlink the entry associated with key s.
+ * Calls zzs_del(p) to unlink.
+ * zzs_scope(sc) -- Specifies that everything added to the symbol
+ * table with zzs_add() is added to the list (scope)
+ * 'sc'. 'sc' is of 'Sym **sc' type and must be
+ * initialized to NULL before trying to add anything
+ * to it (passing it to zzs_scope()). Scopes can be
+ * switched at any time and merely links a set of
+ * symbol table entries. If a NULL pointer is
+ * passed, the current scope is returned.
+ * zzs_rmscope(sc) -- Remove (zzs_del()) all elements of scope 'sc'
+ * from the symbol table. The entries are NOT
+ * free()'d. A pointer to the first
+ * element in the "scope" is returned. The user
+ * can then manipulate the list as he/she chooses
+ * (such as freeing them all). NOTE that this
+ * function sets your scope pointer to NULL,
+ * but returns a pointer to the list for you to use.
+ * zzs_stat() -- Print out the symbol table and some relevant stats.
+ * zzs_new(key) -- Create a new record with calloc() of type Sym.
+ * Add 'key' to the string table and make the new
+ * records 'symbol' pointer point to it.
+ * zzs_strdup(s) -- Add s to the string table and return a pointer
+ * to it. Very fast allocation routine
+ * and does not require strlen() nor calloc().
+ *
+ * Example:
+ *
+ * #include <stdio.h>
+ * #include "sym.h"
+ *
+ * main()
+ * {
+ * Sym *scope1=NULL, *scope2=NULL, *a, *p;
+ *
+ * zzs_init(101, 100);
+ *
+ * a = zzs_new("Apple"); zzs_add(a->symbol, a); -- No scope
+ * zzs_scope( &scope1 ); -- enter scope 1
+ * a = zzs_new("Plum"); zzs_add(a->symbol, a);
+ * zzs_scope( &scope2 ); -- enter scope 2
+ * a = zzs_new("Truck"); zzs_add(a->symbol, a);
+ *
+ * p = zzs_get("Plum");
+ * if ( p == NULL ) fprintf(stderr, "Hmmm...Can't find 'Plum'\n");
+ *
+ * p = zzs_rmscope(&scope1)
+ * for (; p!=NULL; p=p->scope) {printf("Scope1: %s\n", p->symbol);}
+ * p = zzs_rmscope(&scope2)
+ * for (; p!=NULL; p=p->scope) {printf("Scope2: %s\n", p->symbol);}
+ * }
+ *
+ * Terence Parr
+ * Purdue University
+ * February 1990
+ *
+ * CHANGES
+ *
+ * Terence Parr
+ * May 1991
+ * Renamed functions to be consistent with ANTLR
+ * Made HASH macro
+ * Added zzs_keydel()
+ * Added zzs_newadd()
+ * Fixed up zzs_stat()
+ *
+ * July 1991
+ * Made symbol table entry save its hash code for fast comparison
+ * during searching etc...
+ */
+
+/*#include "bt_config.h"*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#ifdef MEMCHK
+#include "trax.h"
+#endif
+#include "sym.h"
+/*#include "my_dmalloc.h"*/
+
+#define StrSame 0
+
+static Sym **CurScope = NULL;
+static unsigned size = 0;
+static Sym **table=NULL;
+static char *strings;
+static char *strp;
+static int strsize = 0;
+
+void
+zzs_init(int sz, int strs)
+{
+ if ( sz <= 0 || strs <= 0 ) return;
+ table = (Sym **) calloc(sz, sizeof(Sym *));
+ if ( table == NULL )
+ {
+ fprintf(stderr, "Cannot allocate table of size %d\n", sz);
+ exit(1);
+ }
+ strings = (char *) calloc(strs, sizeof(char));
+ if ( strings == NULL )
+ {
+ fprintf(stderr, "Cannot allocate string table of size %d\n", strs);
+ exit(1);
+ }
+ size = sz;
+ strsize = strs;
+ strp = strings;
+}
+
+
+void
+zzs_free(void)
+{
+ unsigned i;
+ Sym *cur, *next;
+
+ for (i = 0; i < size; i++)
+ {
+ cur = table[i];
+ while (cur != NULL)
+ {
+ next = cur->next;
+ free (cur);
+ cur = next;
+ }
+ }
+}
+
+
+void
+zzs_done(void)
+{
+ if ( table != NULL ) free( table );
+ if ( strings != NULL ) free( strings );
+}
+
+void
+zzs_add(char *key, register Sym *rec)
+{
+ register unsigned int h=0;
+ register char *p=key;
+
+ HASH_FUN(p, h);
+ rec->hash = h; /* save hash code for fast comp later */
+ h %= size;
+
+ if ( CurScope != NULL ) {rec->scope = *CurScope; *CurScope = rec;}
+ rec->next = table[h]; /* Add to doubly-linked list */
+ rec->prev = NULL;
+ if ( rec->next != NULL ) (rec->next)->prev = rec;
+ table[h] = rec;
+ rec->head = &(table[h]);
+}
+
+Sym *
+zzs_get(char *key)
+{
+ register unsigned int h=0;
+ register char *p=key;
+ register Sym *q;
+
+ HASH_FUN(p, h);
+
+ for (q = table[h%size]; q != NULL; q = q->next)
+ {
+ if ( q->hash == h ) /* do we even have a chance of matching? */
+ if ( strcasecmp(key, q->symbol) == StrSame ) return( q );
+ }
+ return( NULL );
+}
+
+/*
+ * Unlink p from the symbol table. Hopefully, it's actually in the
+ * symbol table.
+ *
+ * If p is not part of a bucket chain of the symbol table, bad things
+ * will happen.
+ *
+ * Will do nothing if all list pointers are NULL
+ */
+void
+zzs_del(register Sym *p)
+{
+ if ( p == NULL ) {fprintf(stderr, "zzs_del(NULL)\n"); exit(1);}
+ if ( p->prev == NULL ) /* Head of list */
+ {
+ register Sym **t = p->head;
+
+ if ( t == NULL ) return; /* not part of symbol table */
+ (*t) = p->next;
+ if ( (*t) != NULL ) (*t)->prev = NULL;
+ }
+ else
+ {
+ (p->prev)->next = p->next;
+ if ( p->next != NULL ) (p->next)->prev = p->prev;
+ }
+ p->next = p->prev = NULL; /* not part of symbol table anymore */
+ p->head = NULL;
+}
+
+void
+zzs_keydel(char *key)
+{
+ Sym *p = zzs_get(key);
+
+ if ( p != NULL ) zzs_del( p );
+}
+
+/* S c o p e S t u f f */
+
+/* Set current scope to 'scope'; return current scope if 'scope' == NULL */
+Sym **
+zzs_scope(Sym **scope)
+{
+ if ( scope == NULL ) return( CurScope );
+ CurScope = scope;
+ return( scope );
+}
+
+/* Remove a scope described by 'scope'. Return pointer to 1st element in scope */
+Sym *
+zzs_rmscope(register Sym **scope)
+{
+ register Sym *p;
+ Sym *start;
+
+ if ( scope == NULL ) return(NULL);
+ start = p = *scope;
+ for (; p != NULL; p=p->scope) { zzs_del( p ); }
+ *scope = NULL;
+ return( start );
+}
+
+void
+zzs_stat(void)
+{
+ static unsigned short count[20];
+ unsigned int i,n=0,low=0, hi=0;
+ register Sym **p;
+ float avg=0.0;
+
+ for (i=0; i<20; i++) count[i] = 0;
+ for (p=table; p<&(table[size]); p++)
+ {
+ register Sym *q = *p;
+ unsigned int len;
+
+ if ( q != NULL && low==0 ) low = p-table;
+ len = 0;
+ if ( q != NULL ) printf("[%d]", p-table);
+ while ( q != NULL )
+ {
+ len++;
+ n++;
+ printf(" %s", q->symbol);
+ q = q->next;
+ if ( q == NULL ) printf("\n");
+ }
+ if ( len>=20 ) printf("zzs_stat: count table too small\n");
+ else count[len]++;
+ if ( *p != NULL ) hi = p-table;
+ }
+
+ printf("Storing %d recs used %d hash positions out of %d\n",
+ n, size-count[0], size);
+ printf("%f %% utilization\n",
+ ((float)(size-count[0]))/((float)size));
+ for (i=0; i<20; i++)
+ {
+ if ( count[i] != 0 )
+ {
+ avg += (((float)(i*count[i]))/((float)n)) * i;
+ printf("Buckets of len %d == %d (%f %% of recs)\n",
+ i, count[i], 100.0*((float)(i*count[i]))/((float)n));
+ }
+ }
+ printf("Avg bucket length %f\n", avg);
+ printf("Range of hash function: %d..%d\n", low, hi);
+}
+
+/*
+ * Given a string, this function allocates and returns a pointer to a
+ * symbol table record whose "symbol" pointer is reset to a position
+ * in the string table.
+ */
+Sym *
+zzs_new(char *text)
+{
+ Sym *p;
+ char *zzs_strdup(register char *s);
+
+ if ( (p = (Sym *) calloc(1,sizeof(Sym))) == 0 )
+ {
+ fprintf(stderr,"Out of memory\n");
+ exit(1);
+ }
+ p->symbol = zzs_strdup(text);
+
+ return p;
+}
+
+/* create a new symbol table entry and add it to the symbol table */
+Sym *
+zzs_newadd(char *text)
+{
+ Sym *p = zzs_new(text);
+ if ( p != NULL ) zzs_add(text, p);
+ return p;
+}
+
+/* Add a string to the string table and return a pointer to it.
+ * Bump the pointer into the string table to next avail position.
+ */
+char *
+zzs_strdup(register char *s)
+{
+ register char *start=strp;
+
+ while ( *s != '\0' )
+ {
+ if ( strp >= &(strings[strsize-2]) )
+ {
+ fprintf(stderr, "sym: string table overflow (%d chars)\n", strsize);
+ exit(-1);
+ }
+ *strp++ = *s++;
+ }
+ *strp++ = '\0';
+
+ return( start );
+}
diff --git a/src/translators/btparse/sym.h b/src/translators/btparse/sym.h
new file mode 100644
index 0000000..78983d1
--- /dev/null
+++ b/src/translators/btparse/sym.h
@@ -0,0 +1,33 @@
+#include <ctype.h>
+
+/*
+ * Declarations for symbol table in sym.c
+ */
+
+/* define some hash function */
+#ifndef HASH_FUN
+#define HASH_FUN(p, h) while ( *p != '\0' ) h = (h<<1) + tolower (*p++);
+#endif
+
+/* minimum symbol table record */
+typedef struct _sym
+{
+ char *symbol; /* the macro name */
+ char *text; /* its expansion */
+ struct _sym *next, *prev, **head, *scope;
+ unsigned int hash;
+} Sym, *SymPtr;
+
+void zzs_init(int, int);
+void zzs_free(void);
+void zzs_done(void);
+void zzs_add(char *, Sym *);
+Sym *zzs_get(char *);
+void zzs_del(Sym *);
+void zzs_keydel(char *);
+Sym **zzs_scope(Sym **);
+Sym *zzs_rmscope(Sym **);
+void zzs_stat(void);
+Sym *zzs_new(char *);
+Sym *zzs_newadd(char *);
+char *zzs_strdup(char *);
diff --git a/src/translators/btparse/tex_tree.c b/src/translators/btparse/tex_tree.c
new file mode 100644
index 0000000..0d7d33d
--- /dev/null
+++ b/src/translators/btparse/tex_tree.c
@@ -0,0 +1,414 @@
+/* ------------------------------------------------------------------------
+@NAME : tex_tree.c
+@DESCRIPTION: Functions for dealing with strings of TeX code: converting
+ them to tree representation, traversing the trees to glean
+ useful information, and converting back to string form.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/05/29, Greg Ward
+@MODIFIED :
+@VERSION : $Id: tex_tree.c,v 1.4 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "error.h"
+#include "btparse.h"
+/*#include "my_dmalloc.h"*/
+
+/* blech! temp hack until I make error.c perfect and magical */
+#define string_warning(w) fprintf (stderr, w);
+
+typedef struct treestack_s
+{
+ bt_tex_tree * node;
+ struct treestack_s
+ * prev,
+ * next;
+} treestack;
+
+
+/* ----------------------------------------------------------------------
+ * Stack manipulation functions
+ */
+
+/* ------------------------------------------------------------------------
+@NAME : push_treestack()
+@INPUT : *stack
+ node
+@OUTPUT : *stack
+@RETURNS :
+@DESCRIPTION: Creates and initializes new node in a stack, and pushes it
+ onto the stack.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+push_treestack (treestack **stack, bt_tex_tree *node)
+{
+ treestack *newtop;
+
+ newtop = (treestack *) malloc (sizeof (treestack));
+ newtop->node = node;
+ newtop->next = NULL;
+ newtop->prev = *stack;
+
+ if (*stack != NULL) /* stack already has some entries */
+ {
+ (*stack)->next = newtop;
+ *stack = newtop;
+ }
+
+ *stack = newtop;
+
+} /* push_treestack() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : pop_treestack
+@INPUT : *stack
+@OUTPUT : *stack
+@RETURNS :
+@DESCRIPTION: Pops an entry off of a stack of tex_tree nodes, frees up
+ the wrapper treestack node, and returns the popped tree node.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static bt_tex_tree *
+pop_treestack (treestack **stack)
+{
+ treestack * oldtop;
+ bt_tex_tree * node;
+
+ if (*stack == NULL)
+ internal_error ("attempt to pop off empty stack");
+ oldtop = (*stack)->prev;
+ node = (*stack)->node;
+ free (*stack);
+ if (oldtop != NULL)
+ oldtop->next = NULL;
+ *stack = oldtop;
+ return node;
+
+} /* pop_treestack() */
+
+
+/* ----------------------------------------------------------------------
+ * Tree creation/destruction functions
+ */
+
+/* ------------------------------------------------------------------------
+@NAME : new_tex_tree
+@INPUT : start
+@OUTPUT :
+@RETURNS : pointer to newly-allocated node
+@DESCRIPTION: Allocates and initializes a bt_tex_tree node.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static bt_tex_tree *
+new_tex_tree (char *start)
+{
+ bt_tex_tree * node;
+
+ node = (bt_tex_tree *) malloc (sizeof (bt_tex_tree));
+ node->start = start;
+ node->len = 0;
+ node->child = node->next = NULL;
+ return node;
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_build_tex_tree
+@INPUT : string
+@OUTPUT :
+@RETURNS : pointer to a complete tree; call bt_free_tex_tree() to free
+ the entire tree
+@DESCRIPTION: Traverses a string looking for TeX groups ({...}), and builds
+ a tree containing pointers into the string and describing
+ its brace-structure.
+@GLOBALS :
+@CALLS :
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+bt_tex_tree *
+bt_build_tex_tree (char * string)
+{
+ int i;
+ int depth;
+ int len;
+ bt_tex_tree
+ * top,
+ * cur,
+ * new;
+ treestack
+ * stack;
+
+ i = 0;
+ depth = 0;
+ len = strlen (string);
+ top = new_tex_tree (string);
+ stack = NULL;
+
+ cur = top;
+
+ while (i < len)
+ {
+ switch (string[i])
+ {
+ case '{': /* go one level deeper */
+ {
+ if (i == len-1) /* open brace in last character? */
+ {
+ string_warning ("unbalanced braces: { at end of string");
+ goto error;
+ }
+
+ new = new_tex_tree (string+i+1);
+ cur->child = new;
+ push_treestack (&stack, cur);
+ cur = new;
+ depth++;
+ break;
+ }
+ case '}': /* pop level(s) off */
+ {
+ while (i < len && string[i] == '}')
+ {
+ if (stack == NULL)
+ {
+ string_warning ("unbalanced braces: extra }");
+ goto error;
+ }
+ cur = pop_treestack (&stack);
+ depth--;
+ i++;
+ }
+ i--;
+
+ if (i == len-1) /* reached end of string? */
+ {
+ if (depth > 0) /* but not at depth 0 */
+ {
+ string_warning ("unbalanced braces: not enough }'s");
+ goto error;
+ }
+
+ /*
+ * if we get here, do nothing -- we've reached the end of
+ * the string and are at depth 0, so will just fall out
+ * of the while loop at the end of this iteration
+ */
+ }
+ else /* still have characters left */
+ { /* to worry about */
+ new = new_tex_tree (string+i+1);
+ cur->next = new;
+ cur = new;
+ }
+
+ break;
+ }
+ default:
+ {
+ cur->len++;
+ }
+
+ } /* switch */
+
+ i++;
+
+ } /* while i */
+
+ if (depth > 0)
+ {
+ string_warning ("unbalanced braces (not enough }'s)");
+ goto error;
+ }
+
+ return top;
+
+error:
+ bt_free_tex_tree (&top);
+ return NULL;
+
+} /* bt_build_tex_tree() */
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_free_tex_tree
+@INPUT : *top
+@OUTPUT : *top (set to NULL after it's free()'d)
+@RETURNS :
+@DESCRIPTION: Frees up an entire tree created by bt_build_tex_tree().
+@GLOBALS :
+@CALLS : itself, free()
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_free_tex_tree (bt_tex_tree **top)
+{
+ if ((*top)->child) bt_free_tex_tree (&(*top)->child);
+ if ((*top)->next) bt_free_tex_tree (&(*top)->next);
+ free (*top);
+ *top = NULL;
+}
+
+
+
+/* ----------------------------------------------------------------------
+ * Tree traversal functions
+ */
+
+/* ------------------------------------------------------------------------
+@NAME : bt_dump_tex_tree
+@INPUT : node
+ depth
+ stream
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Dumps a TeX tree: one node per line, depth indented according
+ to depth.
+@GLOBALS :
+@CALLS : itself
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+void
+bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream)
+{
+ char buf[256];
+
+ if (node == NULL)
+ return;
+
+ if (node->len > 255)
+ internal_error ("augughgh! buf too small");
+ strncpy (buf, node->start, node->len);
+ buf[node->len] = (char) 0;
+
+ fprintf (stream, "%*s[%s]\n", depth*2, "", buf);
+
+ bt_dump_tex_tree (node->child, depth+1, stream);
+ bt_dump_tex_tree (node->next, depth, stream);
+
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : count_length
+@INPUT : node
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Counts the total number of characters that will be needed
+ to print a string reconstructed from a TeX tree. (Length
+ of string in each node, plus two [{ and }] for each down
+ edge.)
+@GLOBALS :
+@CALLS : itself
+@CALLERS : bt_flatten_tex_tree
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static int
+count_length (bt_tex_tree *node)
+{
+ if (node == NULL) return 0;
+ return
+ node->len +
+ (node->child ? 2 : 0) +
+ count_length (node->child) +
+ count_length (node->next);
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : flatten_tree
+@INPUT : node
+ *offset
+@OUTPUT : *buf
+ *offset
+@RETURNS :
+@DESCRIPTION: Dumps a reconstructed string ("flat" representation of the
+ tree) into a pre-allocated buffer, starting at a specified
+ offset.
+@GLOBALS :
+@CALLS : itself
+@CALLERS : bt_flatten_tex_tree
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+static void
+flatten_tree (bt_tex_tree *node, char *buf, int *offset)
+{
+ strncpy (buf + *offset, node->start, node->len);
+ *offset += node->len;
+
+ if (node->child)
+ {
+ buf[(*offset)++] = '{';
+ flatten_tree (node->child, buf, offset);
+ buf[(*offset)++] = '}';
+ }
+
+ if (node->next)
+ {
+ flatten_tree (node->next, buf, offset);
+ }
+}
+
+
+/* ------------------------------------------------------------------------
+@NAME : bt_flatten_tex_tree
+@INPUT : top
+@OUTPUT :
+@RETURNS : flattened string representation of the tree (as a string
+ allocated with malloc(), so you should free() it when
+ you're done with it)
+@DESCRIPTION: Counts the number of characters needed for a "flat"
+ string representation of a tree, allocates a string of
+ that size, and generates the string.
+@GLOBALS :
+@CALLS : count_length, flatten_tree
+@CALLERS :
+@CREATED : 1997/05/29, GPW
+@MODIFIED :
+-------------------------------------------------------------------------- */
+char *
+bt_flatten_tex_tree (bt_tex_tree *top)
+{
+ int len;
+ int offset;
+ char * buf;
+
+ len = count_length (top);
+ buf = (char *) malloc (sizeof (char) * (len+1));
+ offset = 0;
+ flatten_tree (top, buf, &offset);
+ return buf;
+}
diff --git a/src/translators/btparse/tokens.h b/src/translators/btparse/tokens.h
new file mode 100644
index 0000000..6f9405a
--- /dev/null
+++ b/src/translators/btparse/tokens.h
@@ -0,0 +1,41 @@
+#ifndef tokens_h
+#define tokens_h
+/* tokens.h -- List of labelled tokens and stuff
+ *
+ * Generated from: bibtex.g
+ *
+ * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994
+ * Purdue University Electrical Engineering
+ * ANTLR Version 1.33
+ */
+#define zzEOF_TOKEN 1
+#define AT 2
+#define COMMENT 4
+#define NUMBER 9
+#define NAME 10
+#define LBRACE 11
+#define RBRACE 12
+#define ENTRY_OPEN 13
+#define ENTRY_CLOSE 14
+#define EQUALS 15
+#define HASH 16
+#define COMMA 17
+#define STRING 25
+
+void bibfile(AST**_root);
+void entry(AST**_root);
+void body(AST**_root, bt_metatype metatype );
+void contents(AST**_root, bt_metatype metatype );
+void fields(AST**_root);
+void field(AST**_root);
+void value(AST**_root);
+void simple_value(AST**_root);
+
+#endif
+extern SetWordType zzerr1[];
+extern SetWordType zzerr2[];
+extern SetWordType zzerr3[];
+extern SetWordType zzerr4[];
+extern SetWordType setwd1[];
+extern SetWordType zzerr5[];
+extern SetWordType setwd2[];
diff --git a/src/translators/btparse/traversal.c b/src/translators/btparse/traversal.c
new file mode 100644
index 0000000..c7e10a2
--- /dev/null
+++ b/src/translators/btparse/traversal.c
@@ -0,0 +1,187 @@
+/* ------------------------------------------------------------------------
+@NAME : traversal.c
+@DESCRIPTION: Routines for traversing the AST for a single entry.
+@GLOBALS :
+@CALLS :
+@CREATED : 1997/01/21, Greg Ward
+@MODIFIED :
+@VERSION : $Id: traversal.c,v 1.17 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include "btparse.h"
+#include "parse_auxiliary.h"
+#include "prototypes.h"
+/*#include "my_dmalloc.h"*/
+
+
+AST *bt_next_entry (AST *entry_list, AST *prev_entry)
+{
+ if (entry_list == NULL || entry_list->nodetype != BTAST_ENTRY)
+ return NULL;
+
+ if (prev_entry)
+ {
+ if (prev_entry->nodetype != BTAST_ENTRY)
+ return NULL;
+ else
+ return prev_entry->right;
+ }
+ else
+ return entry_list;
+}
+
+
+bt_metatype bt_entry_metatype (AST *entry)
+{
+ if (!entry) return BTE_UNKNOWN;
+ if (entry->nodetype != BTAST_ENTRY)
+ return BTE_UNKNOWN;
+ else
+ return entry->metatype;
+}
+
+
+char *bt_entry_type (AST *entry)
+{
+ if (!entry) return NULL;
+ if (entry->nodetype != BTAST_ENTRY)
+ return NULL;
+ else
+ return entry->text;
+}
+
+
+char *bt_entry_key (AST *entry)
+{
+ if (entry->metatype == BTE_REGULAR &&
+ entry->down && entry->down->nodetype == BTAST_KEY)
+ {
+ return entry->down->text;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+
+AST *bt_next_field (AST *entry, AST *prev, char **name)
+{
+ AST *field;
+ bt_metatype metatype;
+
+ *name = NULL;
+ if (!entry || !entry->down) return NULL; /* protect against empty entry */
+
+ metatype = entry->metatype;
+ if (metatype != BTE_MACRODEF && metatype != BTE_REGULAR)
+ return NULL;
+
+ if (prev == NULL) /* no previous field -- they must */
+ { /* want the first one */
+ field = entry->down;
+ if (metatype == BTE_REGULAR && field->nodetype == BTAST_KEY)
+ field = field->right; /* skip over citation key if present */
+ }
+ else /* they really do want the next one */
+ {
+ field = prev->right;
+ }
+
+ if (!field) return NULL; /* protect against field-less entry */
+ if (name) *name = field->text;
+ return field;
+} /* bt_next_field() */
+
+
+AST *bt_next_macro (AST *entry, AST *prev, char **name)
+{
+ return bt_next_field (entry, prev, name);
+}
+
+
+AST *bt_next_value (AST *top, AST *prev, bt_nodetype *nodetype, char **text)
+{
+ bt_nodetype nt; /* type of `top' node (to check) */
+ bt_metatype mt;
+ AST * value;
+
+ if (nodetype) *nodetype = BTAST_BOGUS;
+ if (text) *text = NULL;
+
+ if (!top) return NULL;
+ /* get_node_type (top, &nt, &mt); */
+ nt = top->nodetype;
+ mt = top->metatype;
+
+ if ((nt == BTAST_FIELD) ||
+ (nt == BTAST_ENTRY && (mt == BTE_COMMENT || mt == BTE_PREAMBLE)))
+ {
+ if (prev == NULL) /* no previous value -- give 'em */
+ { /* the first one */
+ value = top->down;
+ if (!value) return NULL;
+ if (nodetype) *nodetype = value->nodetype;
+ }
+ else
+ {
+ value = prev->right;
+ if (!value) return NULL;
+ if (nodetype) *nodetype = value->nodetype;
+ }
+
+ if (nt == BTAST_ENTRY && value->nodetype != BTAST_STRING)
+ internal_error ("found comment or preamble with non-string value");
+ }
+ else
+ {
+ value = NULL;
+ }
+
+ if (text && value) *text = value->text;
+
+ return value;
+} /* bt_next_value() */
+
+
+char *bt_get_text (AST *node)
+{
+ ushort pp_options = BTO_FULL; /* options for full processing: */
+ /* expand macros, paste strings, */
+ /* collapse whitespace */
+ bt_nodetype nt;
+ bt_metatype mt;
+
+ nt = node->nodetype;
+ mt = node->metatype;
+
+ if (nt == BTAST_FIELD)
+ {
+#if DEBUG
+ char *value;
+
+ dump_ast ("bt_get_text (pre): node =\n", node);
+ value = bt_postprocess_field (node, pp_options, FALSE);
+ dump_ast ("bt_get_text (post): node =\n", node);
+ return value;
+#else
+ return bt_postprocess_field (node, pp_options, FALSE);
+#endif
+ }
+ else if (nt == BTAST_ENTRY && (mt == BTE_COMMENT || mt == BTE_PREAMBLE))
+ {
+ return bt_postprocess_value (node->down, pp_options, FALSE);
+ }
+ else
+ {
+ return NULL;
+ }
+}
diff --git a/src/translators/btparse/util.c b/src/translators/btparse/util.c
new file mode 100644
index 0000000..1330176
--- /dev/null
+++ b/src/translators/btparse/util.c
@@ -0,0 +1,79 @@
+/* ------------------------------------------------------------------------
+@NAME : util.c
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Miscellaneous utility functions. So far, just:
+ strlwr
+ strupr
+@CREATED : Summer 1996, Greg Ward
+@MODIFIED :
+@VERSION : $Id: util.c,v 1.6 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
+
+ This file is part of the btparse library. This library is
+ free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <string.h>
+#include <ctype.h>
+#include "prototypes.h"
+/*#include "my_dmalloc.h"*/
+
+/* ------------------------------------------------------------------------
+@NAME : strlwr()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Converts a string to lowercase in place.
+@GLOBALS :
+@CALLS :
+@CREATED : 1996/01/06, GPW
+@MODIFIED :
+@COMMENTS : This should work the same as strlwr() in DOS compilers --
+ why this isn't mandated by ANSI is a mystery to me...
+-------------------------------------------------------------------------- */
+#if !HAVE_STRLWR
+char *strlwr (char *s)
+{
+ int len, i;
+
+ len = strlen (s);
+ for (i = 0; i < len; i++)
+ s[i] = tolower (s[i]);
+
+ return s;
+}
+#endif
+
+
+
+/* ------------------------------------------------------------------------
+@NAME : strupr()
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Converts a string to uppercase in place.
+@GLOBALS :
+@CALLS :
+@CREATED : 1996/01/06, GPW
+@MODIFIED :
+@COMMENTS : This should work the same as strupr() in DOS compilers --
+ why this isn't mandated by ANSI is a mystery to me...
+-------------------------------------------------------------------------- */
+#if !HAVE_STRUPR
+char *strupr (char *s)
+{
+ int len, i;
+
+ len = strlen (s);
+ for (i = 0; i < len; i++)
+ s[i] = toupper (s[i]);
+
+ return s;
+}
+#endif