--- q3asm.c.orig Wed Jan 23 02:54:37 2002 +++ q3asm.c Wed Jan 23 04:09:58 2002 @@ -3,6 +3,9 @@ #include "mathlib.h" #include "qfiles.h" +/* 19079 total symbols in FI, 2002 Jan 23 */ +#define HASHTABSIZE 2048 + char outputFilename[MAX_OS_PATH]; // the zero page size is just used for detecting run time faults @@ -125,6 +128,17 @@ int value; } symbol_t; +typedef struct hashchain_s { + void *data; + struct hashchain_s *next; +} hashchain_t; + +typedef struct hashtable_s { + int buckets; + hashchain_t **table; +} hashtable_t; + +hashtable_t *symtable; segment_t segment[NUM_SEGMENTS]; segment_t *currentSegment; @@ -134,8 +148,8 @@ int numSymbols; int errorCount; -symbol_t *symbols; -symbol_t *lastSymbol; +symbol_t *symbols = 0; +symbol_t *lastSymbol = 0; /* Most recent symbol defined. */ #define MAX_ASM_FILES 256 @@ -178,11 +192,142 @@ int opcodesHash[ NUM_SOURCE_OPS ]; + +/* Hash table */ +void +hashtable_init (hashtable_t *H, int buckets) +{ + H->buckets = buckets; + H->table = calloc(H->buckets, sizeof(*(H->table))); + return; +} + +hashtable_t * +hashtable_new (int buckets) +{ + hashtable_t *H; + + H = malloc(sizeof(hashtable_t)); + hashtable_init(H, buckets); + return H; +} + + +void +hashtable_add (hashtable_t *H, int hashvalue, void *datum) +{ + hashchain_t *hc, **hb; + + hashvalue %= H->buckets; + hb = &(H->table[hashvalue]); + if (*hb == 0) + { + /* Empty bucket. Create new one. */ + *hb = calloc(1, sizeof(**hb)); + hc = *hb; + } + else + { + /* Get hc to point to last node in chain. */ + for (hc = *hb; hc && hc->next; hc = hc->next); + hc->next = calloc(1, sizeof(*hc)); + hc = hc->next; + } + hc->data = datum; + hc->next = 0; + return; +} + +hashchain_t * +hashtable_get (hashtable_t *H, int hashvalue) +{ + hashvalue %= H->buckets; + return (H->table[hashvalue]); +} + +void +hashtable_stats (hashtable_t *H) +{ + int len, empties, longest, nodes; + int i; + float meanlen; + hashchain_t *hc; + + printf("Stats for hashtable %08X", H); + empties = 0; + longest = 0; + nodes = 0; + for (i = 0; i < H->buckets; i++) + { + if (H->table[i] == 0) + { empties++; continue; } + for (hc = H->table[i], len = 0; hc; hc = hc->next, len++); + if (len > longest) { longest = len; } + nodes += len; + } + meanlen = (float)(nodes) / (H->buckets - empties); +#if 0 + printf(" Total buckets: %d\n", H->buckets); + printf(" Longest chain: %d\n", longest); + printf(" Empty chains: %d\n", empties); + printf(" Mean non-empty chain length: %f\n", meanlen); +#else //0 + printf(", %d buckets, %d nodes", H->buckets, nodes); + printf("\n"); + printf(" Longest chain: %d, empty chains: %d, mean non-empty: %f", longest, empties, meanlen); +#endif //0 + printf("\n"); +} + + +/* Kludge. */ +/* Check if symbol already exists. */ +/* Returns 0 if symbols does NOT already exist, non-zero otherwise. */ +int +hashtable_symbol_exists (hashtable_t *H, int hash, char *sym) +{ + hashchain_t *hc; + symbol_t *s; + + hash %= H->buckets; + hc = H->table[hash]; + if (hc == 0) + { + /* Empty chain means this symbols has not yet been defined. */ + return 0; + } + for (; hc; hc = hc->next) + { + s = (symbol_t*)hc->data; + if ((hash == s->hash) && (strcmp(sym, s->name) == 0)) + { + /* Symbol collisions -- symbol already exists. */ + return 1; + } + } + return 0; /* Can't find collision. */ +} + + + +int +symbol_cmp (symbol_t *a, symbol_t *b) +{ + return 0; +} + +void +sort_symbols () +{ +} + + /* ============= HashString ============= */ +#if 0 int HashString( char *s ) { int v; @@ -192,6 +337,31 @@ } return v; } +#else //0 +/* Default hash function taken of Kazlib 1.19, slightly modified. */ +unsigned int HashString (const char *key) +{ + static unsigned long randbox[] = { + 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, + 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, + 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, + 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, + }; + + const char *str = key; + unsigned int acc = 0; + + while (*str) { + acc ^= randbox[(*str + acc) & 0xf]; + acc = (acc << 1) | (acc >> 31); + acc &= 0xffffffffU; + acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; + acc = (acc << 2) | (acc >> 30); + acc &= 0xffffffffU; + } + return abs(acc); +} +#endif //0 /* @@ -264,12 +434,19 @@ hash = HashString( sym ); +#if 0 for ( s = symbols ; s ; s = s->next ) { if ( hash == s->hash && !strcmp( sym, s->name ) ) { CodeError( "Multiple definitions for %s\n", sym ); return; } } +#else //0 + if (hashtable_symbol_exists(symtable, hash, sym)) { + CodeError( "Multiple definitions for %s\n", sym ); + return; + } +#endif //0 s = malloc( sizeof( *s ) ); s->name = copystring( sym ); @@ -277,6 +454,9 @@ s->value = value; s->segment = currentSegment; + hashtable_add(symtable, hash, s); + +#if 0 lastSymbol = s; /* for the move-to-lit-segment byteswap hack */ // insert it in order @@ -290,6 +470,16 @@ } s->next = after->next; after->next = s; +#else //0 + /* hashtable lookup speeds up symbol lookup enormously. I see no reason why the straightforward symbols list needs to be sorted as well. */ + /* Since we're not insert-sorting the symbols list, lastSymbol should be pointing to the last valid symbol in the list (i.e. the end of list). */ + if (symbols == 0) { + lastSymbol = symbols = s; + } else { + lastSymbol->next = s; + lastSymbol = s; + } +#endif //0 } @@ -304,6 +494,7 @@ symbol_t *s; char expanded[MAX_LINE_LENGTH]; int hash; + hashchain_t *hc; if ( passNumber == 0 ) { return 0; @@ -316,11 +507,21 @@ } hash = HashString( sym ); +#if 0 for ( s = symbols ; s ; s = s->next ) { if ( hash == s->hash && !strcmp( sym, s->name ) ) { return s->segment->segmentBase + s->value; } } +#else + /* Sped-up lookup with symbol hash table. hopefully. */ + for (hc = hashtable_get(symtable, hash); hc; hc = hc->next) { + s = (symbol_t*)hc->data; + if ( hash == s->hash && !strcmp( sym, s->name ) ) { + return s->segment->segmentBase + s->value; + } + } +#endif //0 CodeError( "ERROR: symbol %s undefined\n", sym ); passNumber = 0; @@ -737,10 +938,13 @@ v2 = ParseValue(); if ( v == 1 ) { +/* Character (1-byte) values go into lit(eral) segment. */ HackToSegment( LITSEG ); } else if ( v == 4 ) { +/* 32-bit (4-byte) values go into data segment. */ HackToSegment( DATASEG ); } else if ( v == 2 ) { +/* and 16-bit (2-byte) values will cause q3asm to barf. */ CodeError( "16 bit initialized data not supported" ); } @@ -780,6 +984,8 @@ for ( i = 0 ; i < NUM_SOURCE_OPS ; i++ ) { opcodesHash[i] = HashString( sourceOps[i].name ); } + + symtable = hashtable_new(HASHTABSIZE); } @@ -905,6 +1111,11 @@ for ( i = 0 ; i < NUM_SEGMENTS ; i++ ) { segment[i].imageUsed = (segment[i].imageUsed + 3) & ~3; } + +/* Pass 0 finished, sort the symbols. Or do we need to? */ + if (passNumber == 0) { + sort_symbols(); + } } // reserve the stack in bss @@ -1007,6 +1218,14 @@ } Assemble(); + + { + symbol_t *s; + + for ( i = 0, s = symbols ; s ; s = s->next, i++ ) /* nop */ ; + printf("%d symbols defined\n", i); + hashtable_stats(symtable); + } end = I_FloatTime (); printf ("%5.0f seconds elapsed\n", end-start);