--- q3asm.c.orig Wed Jan 23 02:54:37 2002 +++ q3asm.c Wed Jan 23 03:00:31 2002 @@ -125,6 +125,19 @@ int value; } symbol_t; +typedef struct hashchain_s { + void *data; + struct hashchain_s *next; +} hashchain_t; + +typedef struct hashtable_s { + int buckets; + hashchain_t **table; +} hashtable_t; + +/* 19603 total symbols in FI, 2002.01.23 */ +#define HASHTABSIZE 2048; +hashtable_t *symtable; segment_t segment[NUM_SEGMENTS]; segment_t *currentSegment; @@ -135,7 +148,7 @@ int errorCount; symbol_t *symbols; -symbol_t *lastSymbol; +symbol_t *lastSymbol; /* Most recent symbol defined. */ #define MAX_ASM_FILES 256 @@ -178,13 +191,143 @@ int opcodesHash[ NUM_SOURCE_OPS ]; + +/* Hash table */ +void +hashtable_init (hashtable_t *H, int buckets) +{ + H->buckets = buckets; + H->table = calloc(H->buckets, sizeof(*(H->table))); + return; +} + +hashtable_t * +hashtable_new (int buckets) +{ + hashtable_t *H; + + H = malloc(sizeof(hashtable_t)); + hashtable_init(H, buckets); + return H; +} + + +void +hashtable_add (hashtable_t *H, int hashvalue, void *datum) +{ + hashchain_t *hc, **hb; + + hashvalue %= H->buckets; + hb = &(H->table[hashvalue]); + if (*hb == 0) + { + /* Empty bucket. Create new one. */ + *hb = calloc(1, sizeof(**hb)); + hc = *hb; + } + else + { + /* Get hc to point to last node in chain. */ + for (hc = *hb; hc && hc->next; hc = hc->next); + hc->next = calloc(1, sizeof(*hc)); + hc = hc->next; + } + hc->data = datum; + hc->next = 0; + return; +} + +hashchain_t * +hashtable_get (hashtable_t *H, int hashvalue) +{ + hashvalue %= H->buckets; + return (H->table[hashvalue]); +} + +void +hashtable_stats (hashtable_t *H) +{ + int sum, total; + int len, empties, longest, nodes; + int i, j; + float meanlen; + hashchain_t *hc; + + printf("Stats for hashtable %08X, %d buckets\n", H, H->buckets); + empties = 0; + longest = 0; + nodes = 0; + for (i = 0; i < H->buckets; i++) + { + if (H->table[i] == 0) + { empties++; continue; } + for (hc = H->table[i], len = 0; hc; hc = hc->next, len++); + if (len > longest) { longest = len; } + nodes += len; + } + meanlen = (float)(nodes) / (H->buckets - empties); +#if 0 + printf(" Total buckets: %d\n", H->buckets); + printf(" Longest chain: %d\n", longest); + printf(" Empty chains: %d\n", empties); + printf(" Mean non-empty chain length: %f\n", meanlen); +#else //0 + printf(" Longest chain: %d, empty chains: %d, mean non-empty: %f", longest, empties, meanlen); +#endif //0 + printf("\n"); +} + + +/* Kludge. */ +/* Check if symbol already exists. */ +/* Returns 0 if symbols does NOT already exist, non-zero otherwise. */ +int +hashtable_symbol_exists (hashtable_t *H, int hash, char *sym) +{ + int i; + hashchain_t *hc; + symbol_t *s; + + hash %= H->buckets; + hc = H->table[hash]; + if (hc == 0) + { + /* Empty chain means this symbols has not yet been defined. */ + return 0; + } + for (; hc; hc = hc->next) + { + s = (symbol_t*)hc->data; + if ((hash == s->hash) && (strcmp(sym, s->name) == 0)) + { + /* Symbol collisions -- symbol already exists. */ + return 1; + } + } + return 0; /* Can't find collision. */ +} + + + +int +symbol_cmp (symbol_t *a, symbol_t *b) +{ +} + +void +sort_symbols () +{ +} + + /* ============= HashString ============= */ +#if 0 int HashString( char *s ) { - int v; + int v = 0; while ( *s ) { v += *s; @@ -192,6 +335,31 @@ } return v; } +#else //0 +/* Default hash function taken of Kazlib 1.19, slightly modified. */ +unsigned int HashString (const char *key) +{ + static unsigned long randbox[] = { + 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, + 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, + 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, + 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, + }; + + const unsigned char *str = key; + unsigned int acc = 0; + + while (*str) { + acc ^= randbox[(*str + acc) & 0xf]; + acc = (acc << 1) | (acc >> 31); + acc &= 0xffffffffU; + acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; + acc = (acc << 2) | (acc >> 30); + acc &= 0xffffffffU; + } + return abs(acc); +} +#endif //0 /* @@ -264,12 +432,19 @@ hash = HashString( sym ); +#if 0 for ( s = symbols ; s ; s = s->next ) { if ( hash == s->hash && !strcmp( sym, s->name ) ) { CodeError( "Multiple definitions for %s\n", sym ); return; } } +#else //0 + if (hashtable_symbol_exists(symtable, hash, sym)) { + CodeError( "Multiple definitions for %s\n", sym ); + return; + } +#endif //0 s = malloc( sizeof( *s ) ); s->name = copystring( sym ); @@ -277,8 +452,11 @@ s->value = value; s->segment = currentSegment; + hashtable_add(symtable, hash, s); + lastSymbol = s; /* for the move-to-lit-segment byteswap hack */ +#if 0 // insert it in order if ( !symbols || s->value < symbols->value ) { s->next = symbols; @@ -290,6 +468,16 @@ } s->next = after->next; after->next = s; +#else //0 + /* hashtable lookup speeds up symbol lookup enormously. I see no reason why the straightforward symbols list needs to be sorted as well. */ + /* Since we're not insert-sorting the symbols list, lastSymbol should be pointing to the last valid symbol in the list (i.e. the end of list). */ + if (symbols == 0) { + lastSymbol = symbols = s; + } else { + lastSymbol->next = s; + lastSymbol = s; + } +#endif //0 } @@ -304,6 +492,7 @@ symbol_t *s; char expanded[MAX_LINE_LENGTH]; int hash; + hashchain_t *hc; if ( passNumber == 0 ) { return 0; @@ -316,11 +505,22 @@ } hash = HashString( sym ); +#if 0 for ( s = symbols ; s ; s = s->next ) { if ( hash == s->hash && !strcmp( sym, s->name ) ) { return s->segment->segmentBase + s->value; } } +#else + /* Sped-up lookup with symbol hash table. hopefully. */ +// for ( s = (symbol_t*)hashtable_get(symtable, hash); s; s = s->next) { + for (hc = hashtable_get(symtable, hash); hc; hc = hc->next) { + s = (symbol_t*)hc->data; + if ( hash == s->hash && !strcmp( sym, s->name ) ) { + return s->segment->segmentBase + s->value; + } + } +#endif //0 CodeError( "ERROR: symbol %s undefined\n", sym ); passNumber = 0; @@ -737,10 +937,13 @@ v2 = ParseValue(); if ( v == 1 ) { +/* Character (1-byte) values go into lit(eral) segment. */ HackToSegment( LITSEG ); } else if ( v == 4 ) { +/* 32-bit (4-byte) values go into data segment. */ HackToSegment( DATASEG ); } else if ( v == 2 ) { +/* and 16-bit (2-byte) values will cause q3asm to barf. */ CodeError( "16 bit initialized data not supported" ); } @@ -780,6 +983,8 @@ for ( i = 0 ; i < NUM_SOURCE_OPS ; i++ ) { opcodesHash[i] = HashString( sourceOps[i].name ); } + + symtable = hashtable_new(HASHTABSIZE); } @@ -905,6 +1110,11 @@ for ( i = 0 ; i < NUM_SEGMENTS ; i++ ) { segment[i].imageUsed = (segment[i].imageUsed + 3) & ~3; } + +/* Pass 0 finished, sort the symbols. Or do we need to? */ + if (passNumber == 0) { + sort_symbols(); + } } // reserve the stack in bss @@ -1007,6 +1217,13 @@ } Assemble(); + + { + symbol_t *s; + for(i=0,s=symbols;s;s=s->next,i++); + printf("%d symbols defined\n", i); + hashtable_stats(symtable); + } end = I_FloatTime (); printf ("%5.0f seconds elapsed\n", end-start);