--- q3asm.c.orig	Wed Jan 23 02:54:37 2002
+++ q3asm.c	Wed Jan 23 04:09:58 2002
@@ -3,6 +3,9 @@
 #include "mathlib.h"
 #include "qfiles.h"
 
+/* 19079 total symbols in FI, 2002 Jan 23 */
+#define HASHTABSIZE 2048
+
 char	outputFilename[MAX_OS_PATH];
 
 // the zero page size is just used for detecting run time faults
@@ -125,6 +128,17 @@
 	int		value;
 } symbol_t;
 
+typedef struct hashchain_s {
+  void *data;
+  struct hashchain_s *next;
+} hashchain_t;
+
+typedef struct hashtable_s {
+  int buckets;
+  hashchain_t **table;
+} hashtable_t;
+
+hashtable_t *symtable;
 
 segment_t	segment[NUM_SEGMENTS];
 segment_t	*currentSegment;
@@ -134,8 +148,8 @@
 int		numSymbols;
 int		errorCount;
 
-symbol_t	*symbols;
-symbol_t	*lastSymbol;
+symbol_t	*symbols = 0;
+symbol_t	*lastSymbol = 0;  /* Most recent symbol defined. */
 
 
 #define	MAX_ASM_FILES	256
@@ -178,11 +192,142 @@
 int		opcodesHash[ NUM_SOURCE_OPS ];
 
 
+
+/* Hash table */
+void
+hashtable_init (hashtable_t *H, int buckets)
+{
+  H->buckets = buckets;
+  H->table = calloc(H->buckets, sizeof(*(H->table)));
+  return;
+}
+
+hashtable_t *
+hashtable_new (int buckets)
+{
+  hashtable_t *H;
+
+  H = malloc(sizeof(hashtable_t));
+  hashtable_init(H, buckets);
+  return H;
+}
+
+
+void
+hashtable_add (hashtable_t *H, int hashvalue, void *datum)
+{
+  hashchain_t *hc, **hb;
+
+  hashvalue %= H->buckets;
+  hb = &(H->table[hashvalue]);
+  if (*hb == 0)
+    {
+      /* Empty bucket.  Create new one. */
+      *hb = calloc(1, sizeof(**hb));
+      hc = *hb;
+    }
+  else
+    {
+      /* Get hc to point to last node in chain. */
+      for (hc = *hb; hc && hc->next; hc = hc->next);
+      hc->next = calloc(1, sizeof(*hc));
+      hc = hc->next;
+    }
+  hc->data = datum;
+  hc->next = 0;
+  return;
+}
+
+hashchain_t *
+hashtable_get (hashtable_t *H, int hashvalue)
+{
+  hashvalue %= H->buckets;
+  return (H->table[hashvalue]);
+}
+
+void
+hashtable_stats (hashtable_t *H)
+{
+  int len, empties, longest, nodes;
+  int i;
+  float meanlen;
+  hashchain_t *hc;
+
+  printf("Stats for hashtable %08X", H);
+  empties = 0;
+  longest = 0;
+  nodes = 0;
+  for (i = 0; i < H->buckets; i++)
+    {
+      if (H->table[i] == 0)
+        { empties++; continue; }
+      for (hc = H->table[i], len = 0; hc; hc = hc->next, len++);
+      if (len > longest) { longest = len; }
+      nodes += len;
+    }
+  meanlen = (float)(nodes) / (H->buckets - empties);
+#if 0
+  printf(" Total buckets: %d\n", H->buckets);
+  printf(" Longest chain: %d\n", longest);
+  printf(" Empty chains: %d\n", empties);
+  printf(" Mean non-empty chain length: %f\n", meanlen);
+#else //0
+  printf(", %d buckets, %d nodes", H->buckets, nodes);
+  printf("\n");
+  printf(" Longest chain: %d, empty chains: %d, mean non-empty: %f", longest, empties, meanlen);
+#endif //0
+  printf("\n");
+}
+
+
+/* Kludge. */
+/* Check if symbol already exists. */
+/* Returns 0 if symbols does NOT already exist, non-zero otherwise. */
+int
+hashtable_symbol_exists (hashtable_t *H, int hash, char *sym)
+{
+  hashchain_t *hc;
+  symbol_t *s;
+
+  hash %= H->buckets;
+  hc = H->table[hash];
+  if (hc == 0)
+    {
+      /* Empty chain means this symbols has not yet been defined. */
+      return 0;
+    }
+  for (; hc; hc = hc->next)
+    {
+      s = (symbol_t*)hc->data;
+      if ((hash == s->hash) && (strcmp(sym, s->name) == 0))
+        {
+          /* Symbol collisions -- symbol already exists. */
+          return 1;
+        }
+    }
+  return 0;  /* Can't find collision. */
+}
+
+
+
+int
+symbol_cmp (symbol_t *a, symbol_t *b)
+{
+  return 0;
+}
+
+void
+sort_symbols ()
+{
+}
+
+
 /*
 =============
 HashString
 =============
 */
+#if 0
 int	HashString( char *s ) {
 	int		v;
 
@@ -192,6 +337,31 @@
 	}
 	return v;
 }
+#else //0
+/* Default hash function taken of Kazlib 1.19, slightly modified. */
+unsigned int HashString (const char *key)
+{
+    static unsigned long randbox[] = {
+    0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
+    0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
+    0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
+    0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
+    };
+
+    const char *str = key;
+    unsigned int acc = 0;
+
+    while (*str) {
+    acc ^= randbox[(*str + acc) & 0xf];
+    acc = (acc << 1) | (acc >> 31);
+    acc &= 0xffffffffU;
+    acc ^= randbox[((*str++ >> 4) + acc) & 0xf];
+    acc = (acc << 2) | (acc >> 30);
+    acc &= 0xffffffffU;
+    }
+    return abs(acc);
+}
+#endif //0
 
 
 /*
@@ -264,12 +434,19 @@
 
 	hash = HashString( sym );
 
+#if 0
 	for ( s = symbols ; s ; s = s->next ) {
 		if ( hash == s->hash && !strcmp( sym, s->name ) ) {
 			CodeError( "Multiple definitions for %s\n", sym );
 			return;
 		}
 	}
+#else //0
+	if (hashtable_symbol_exists(symtable, hash, sym)) {
+		CodeError( "Multiple definitions for %s\n", sym );
+		return;
+	}
+#endif //0
 
 	s = malloc( sizeof( *s ) );
 	s->name = copystring( sym );
@@ -277,6 +454,9 @@
 	s->value = value;
 	s->segment = currentSegment;
 
+	hashtable_add(symtable, hash, s);
+
+#if 0
 	lastSymbol = s;	/* for the move-to-lit-segment byteswap hack */
 
 	// insert it in order
@@ -290,6 +470,16 @@
 	}
 	s->next = after->next;
 	after->next = s;
+#else //0
+  /* hashtable lookup speeds up symbol lookup enormously.  I see no reason why the straightforward symbols list needs to be sorted as well. */
+  /* Since we're not insert-sorting the symbols list, lastSymbol should be pointing to the last valid symbol in the list (i.e. the end of list). */
+	if (symbols == 0) {
+		lastSymbol = symbols = s;
+	} else {
+		lastSymbol->next = s;
+		lastSymbol = s;
+	}
+#endif //0
 }
 
 
@@ -304,6 +494,7 @@
 	symbol_t	*s;
 	char		expanded[MAX_LINE_LENGTH];
 	int			hash;
+  hashchain_t *hc;
 
 	if ( passNumber == 0 ) {
 		return 0;
@@ -316,11 +507,21 @@
 	}
 
 	hash = HashString( sym );
+#if 0
 	for ( s = symbols ; s ; s = s->next ) {
 		if ( hash == s->hash && !strcmp( sym, s->name ) ) {
 			return s->segment->segmentBase + s->value;
 		}
 	}
+#else
+  /* Sped-up lookup with symbol hash table.  hopefully. */
+	for (hc = hashtable_get(symtable, hash); hc; hc = hc->next) {
+		s = (symbol_t*)hc->data;
+		if ( hash == s->hash && !strcmp( sym, s->name ) ) {
+			return s->segment->segmentBase + s->value;
+		}
+	}
+#endif //0
 
 	CodeError( "ERROR: symbol %s undefined\n", sym );
 	passNumber = 0;
@@ -737,10 +938,13 @@
 		v2 = ParseValue();
 
 		if ( v == 1 ) {
+/* Character (1-byte) values go into lit(eral) segment. */
 			HackToSegment( LITSEG );
 		} else if ( v == 4 ) {
+/* 32-bit (4-byte) values go into data segment. */
 			HackToSegment( DATASEG );
 		} else if ( v == 2 ) {
+/* and 16-bit (2-byte) values will cause q3asm to barf. */
 			CodeError( "16 bit initialized data not supported" );
 		}
 
@@ -780,6 +984,8 @@
 	for ( i = 0 ; i < NUM_SOURCE_OPS ; i++ ) {
 		opcodesHash[i] = HashString( sourceOps[i].name );
 	}
+
+	symtable = hashtable_new(HASHTABSIZE);
 }
 
 
@@ -905,6 +1111,11 @@
 		for ( i = 0 ; i < NUM_SEGMENTS ; i++ ) {
 			segment[i].imageUsed = (segment[i].imageUsed + 3) & ~3;
 		}
+
+/* Pass 0 finished, sort the symbols.  Or do we need to? */
+		if (passNumber == 0) {
+			sort_symbols();
+		}
 	}
 
 	// reserve the stack in bss
@@ -1007,6 +1218,14 @@
 	}
 
 	Assemble();
+
+	{
+		symbol_t *s;
+
+		for ( i = 0, s = symbols ; s ; s = s->next, i++ ) /* nop */ ;
+		printf("%d symbols defined\n", i);
+		hashtable_stats(symtable);
+	}
 
 	end = I_FloatTime ();
 	printf ("%5.0f seconds elapsed\n", end-start);
