1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483 |
- #ifndef _OBJC_SELOPT_H
- #define _OBJC_SELOPT_H
- #include <stdint.h>
- #include <stdlib.h>
- #ifdef SELOPT_WRITE
- #include <unordered_map>
- #endif
- #ifndef STATIC_ASSERT
- # define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__)
- # define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line)
- # define _STATIC_ASSERT3(x, line) \
- typedef struct { \
- int _static_assert[(x) ? 0 : -1]; \
- } _static_assert_ ## line __attribute__((unavailable))
- #endif
- #define SELOPT_DEBUG 0
- #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x)
- #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x)
- namespace objc_opt {
- typedef int32_t objc_stringhash_offset_t;
- typedef uint8_t objc_stringhash_check_t;
- static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level);
- #ifdef SELOPT_WRITE
- struct __attribute__((packed)) perfect_hash {
- uint32_t capacity;
- uint32_t occupied;
- uint32_t shift;
- uint32_t mask;
- uint64_t salt;
- uint32_t scramble[256];
- uint8_t *tab;
-
- perfect_hash() : tab(0) { }
-
- ~perfect_hash() { if (tab) delete[] tab; }
- };
- struct eqstr {
- bool operator()(const char* s1, const char* s2) const {
- return strcmp(s1, s2) == 0;
- }
- };
- struct hashstr {
- size_t operator()(const char *s) const {
- return (size_t)lookup8((uint8_t *)s, strlen(s), 0);
- }
- };
- typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> string_map;
- typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> protocol_map;
- typedef std::unordered_multimap<const char *, std::pair<uint64_t, uint64_t>, hashstr, eqstr> class_map;
- static perfect_hash make_perfect(const string_map& strings);
- #endif
- struct __attribute__((packed)) objc_stringhash_t {
- uint32_t capacity;
- uint32_t occupied;
- uint32_t shift;
- uint32_t mask;
- uint32_t unused1;
- uint32_t unused2;
- uint64_t salt;
- uint32_t scramble[256];
- uint8_t tab[0];
-
-
- objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; }
- const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; }
- objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; }
- const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; }
- uint32_t hash(const char *key, size_t keylen) const
- {
- uint64_t val = lookup8((uint8_t*)key, keylen, salt);
- uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]];
- return index;
- }
- uint32_t hash(const char *key) const
- {
- return hash(key, strlen(key));
- }
-
-
-
-
- objc_stringhash_check_t checkbyte(const char *key, size_t keylen) const
- {
- return
- ((key[0] & 0x7) << 5)
- |
- ((uint8_t)keylen & 0x1f);
- }
- objc_stringhash_check_t checkbyte(const char *key) const
- {
- return checkbyte(key, strlen(key));
- }
- #define INDEX_NOT_FOUND (~(uint32_t)0)
- uint32_t getIndex(const char *key) const
- {
- size_t keylen = strlen(key);
- uint32_t h = hash(key, keylen);
-
- objc_stringhash_check_t h_check = checkbytes()[h];
- objc_stringhash_check_t key_check = checkbyte(key, keylen);
- bool check_fail = (h_check != key_check);
- #if ! SELOPT_DEBUG
- if (check_fail) return INDEX_NOT_FOUND;
- #endif
- objc_stringhash_offset_t offset = offsets()[h];
- if (offset == 0) return INDEX_NOT_FOUND;
- const char *result = (const char *)this + offset;
- if (0 != strcmp(key, result)) return INDEX_NOT_FOUND;
- #if SELOPT_DEBUG
- if (check_fail) abort();
- #endif
- return h;
- }
- #ifdef SELOPT_WRITE
- size_t size()
- {
- return sizeof(objc_stringhash_t)
- + mask+1
- + capacity * sizeof(objc_stringhash_check_t)
- + capacity * sizeof(objc_stringhash_offset_t);
- }
- void byteswap(bool little_endian)
- {
-
- for (uint32_t i = 0; i < 256; i++) {
- S32(scramble[i]);
- }
- objc_stringhash_offset_t *o = offsets();
- for (uint32_t i = 0; i < capacity; i++) {
- S32(o[i]);
- }
-
- S32(capacity);
- S32(occupied);
- S32(shift);
- S32(mask);
- S64(salt);
- }
- const char *write(uint64_t base, size_t remaining, string_map& strings)
- {
- if (sizeof(objc_stringhash_t) > remaining) {
- return "selector section too small (metadata not optimized)";
- }
- if (strings.size() == 0) {
- bzero(this, sizeof(objc_stringhash_t));
- return NULL;
- }
-
- perfect_hash phash = make_perfect(strings);
- if (phash.capacity == 0) {
- return "perfect hash failed (metadata not optimized)";
- }
-
- capacity = phash.capacity;
- occupied = phash.occupied;
- shift = phash.shift;
- mask = phash.mask;
- unused1 = 0;
- unused2 = 0;
- salt = phash.salt;
- if (size() > remaining) {
- return "selector section too small (metadata not optimized)";
- }
-
-
- for (uint32_t i = 0; i < 256; i++) {
- scramble[i] = phash.scramble[i];
- }
- for (uint32_t i = 0; i < phash.mask+1; i++) {
- tab[i] = phash.tab[i];
- }
-
-
- for (uint32_t i = 0; i < phash.capacity; i++) {
- offsets()[i] = 0;
- }
-
- for (uint32_t i = 0; i < phash.capacity; i++) {
- checkbytes()[i] = 0;
- }
-
-
- # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
- string_map::const_iterator s;
- for (s = strings.begin(); s != strings.end(); ++s) {
- int64_t offset = s->second - base;
- if ((offset<<SHIFT)>>SHIFT != offset) {
- return "selector offset too big (metadata not optimized)";
- }
- uint32_t h = hash(s->first);
- offsets()[h] = (objc_stringhash_offset_t)offset;
- checkbytes()[h] = checkbyte(s->first);
- }
- # undef SHIFT
-
- return NULL;
- }
- #endif
- };
- struct objc_selopt_t : objc_stringhash_t {
- const char *get(const char *key) const
- {
- uint32_t h = getIndex(key);
- if (h == INDEX_NOT_FOUND) return NULL;
-
- return (const char *)this + offsets()[h];
- }
- };
- struct objc_classheader_t {
- objc_stringhash_offset_t clsOffset;
- objc_stringhash_offset_t hiOffset;
-
-
-
- bool isDuplicate() const { return clsOffset & 1; }
- uint32_t duplicateCount() const { return clsOffset >> 1; }
- uint32_t duplicateIndex() const { return hiOffset; }
- };
- struct objc_clsopt_t : objc_stringhash_t {
-
-
-
-
- objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; }
- const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; }
-
- uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; }
- const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; }
- objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); }
- const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); }
-
-
-
- uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const
- {
- uint32_t h = getIndex(key);
- if (h == INDEX_NOT_FOUND) {
- cls = NULL;
- hi = NULL;
- return 0;
- }
- const objc_classheader_t& clshi = classOffsets()[h];
- if (! clshi.isDuplicate()) {
-
- cls = (void *)((const char *)this + clshi.clsOffset);
- hi = (void *)((const char *)this + clshi.hiOffset);
- return 1;
- }
- else {
-
- cls = NULL;
- hi = NULL;
- return clshi.duplicateCount();
- }
- }
- void getClassesAndHeaders(const char *key, void **cls, void **hi) const
- {
- uint32_t h = getIndex(key);
- if (h == INDEX_NOT_FOUND) return;
- const objc_classheader_t& clshi = classOffsets()[h];
- if (! clshi.isDuplicate()) {
-
- cls[0] = (void *)((const char *)this + clshi.clsOffset);
- hi[0] = (void *)((const char *)this + clshi.hiOffset);
- }
- else {
-
- uint32_t count = clshi.duplicateCount();
- const objc_classheader_t *list =
- &duplicateOffsets()[clshi.duplicateIndex()];
- for (uint32_t i = 0; i < count; i++) {
- cls[i] = (void *)((const char *)this + list[i].clsOffset);
- hi[i] = (void *)((const char *)this + list[i].hiOffset);
- }
- }
- }
- #ifdef SELOPT_WRITE
- size_t size()
- {
- return
- objc_stringhash_t::size()
- + capacity * sizeof(objc_classheader_t)
- + sizeof(duplicateCount())
- + duplicateCount() * sizeof(objc_classheader_t);
- }
- void byteswap(bool little_endian)
- {
- objc_classheader_t *o;
-
- o = classOffsets();
- for (uint32_t i = 0; i < capacity; i++) {
- S32(o[i].clsOffset);
- S32(o[i].hiOffset);
- }
- o = duplicateOffsets();
- for (uint32_t i = 0; i < duplicateCount(); i++) {
- S32(o[i].clsOffset);
- S32(o[i].hiOffset);
- }
- S32(duplicateCount());
- objc_stringhash_t::byteswap(little_endian);
- }
-
- const char *write(uint64_t base, size_t remaining,
- string_map& strings, class_map& classes, bool verbose)
- {
- const char *err;
- err = objc_stringhash_t::write(base, remaining, strings);
- if (err) return err;
- if (size() > remaining) {
- return "selector section too small (metadata not optimized)";
- }
-
- for (uint32_t i = 0; i < capacity; i++) {
- classOffsets()[i].clsOffset = 0;
- classOffsets()[i].hiOffset = 0;
- }
-
-
- # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
- class_map::const_iterator c;
- for (c = classes.begin(); c != classes.end(); ++c) {
- uint32_t h = getIndex(c->first);
- if (h == INDEX_NOT_FOUND) {
- return "class list busted (metadata not optimized)";
- }
- if (classOffsets()[h].clsOffset != 0) {
-
- continue;
- }
- uint32_t count = (uint32_t)classes.count(c->first);
- if (count == 1) {
-
- int64_t coff = c->second.first - base;
- int64_t hoff = c->second.second - base;
- if ((coff<<SHIFT)>>SHIFT != coff) {
- return "class offset too big (metadata not optimized)";
- }
- if ((hoff<<SHIFT)>>SHIFT != hoff) {
- return "header offset too big (metadata not optimized)";
- }
- classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff;
- classOffsets()[h].hiOffset = (objc_stringhash_offset_t)hoff;
- }
- else {
-
- if (verbose) {
- fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first);
- }
- uint32_t dest = duplicateCount();
- duplicateCount() += count;
- if (size() > remaining) {
- return "selector section too small (metadata not optimized)";
- }
-
- classOffsets()[h].clsOffset = count*2 + 1;
- classOffsets()[h].hiOffset = dest;
- std::pair<class_map::const_iterator, class_map::const_iterator>
- duplicates = classes.equal_range(c->first);
- class_map::const_iterator dup;
- for (dup = duplicates.first; dup != duplicates.second; ++dup) {
- int64_t coff = dup->second.first - base;
- int64_t hoff = dup->second.second - base;
- if ((coff<<SHIFT)>>SHIFT != coff) {
- return "class offset too big (metadata not optimized)";
- }
- if ((hoff<<SHIFT)>>SHIFT != hoff) {
- return "header offset too big (metadata not optimized)";
- }
-
- duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff;
- duplicateOffsets()[dest].hiOffset = (objc_stringhash_offset_t)hoff;
- dest++;
- }
- }
- }
- # undef SHIFT
-
- return NULL;
- }
- #endif
- };
- struct objc_protocolopt_t : objc_stringhash_t {
-
-
- objc_stringhash_offset_t *protocolOffsets() { return (objc_stringhash_offset_t *)&offsets()[capacity]; }
- const objc_stringhash_offset_t *protocolOffsets() const { return (const objc_stringhash_offset_t *)&offsets()[capacity]; }
- void* getProtocol(const char *key) const
- {
- uint32_t h = getIndex(key);
- if (h == INDEX_NOT_FOUND) {
- return NULL;
- }
- return (void *)((const char *)this + protocolOffsets()[h]);
- }
- #ifdef SELOPT_WRITE
- size_t size()
- {
- return
- objc_stringhash_t::size() + capacity * sizeof(objc_stringhash_offset_t);
- }
- void byteswap(bool little_endian)
- {
- objc_stringhash_offset_t *o;
-
- o = protocolOffsets();
- for (objc_stringhash_offset_t i = 0; i < (int)capacity; i++) {
- S32(o[i]);
- }
- objc_stringhash_t::byteswap(little_endian);
- }
-
- const char *write(uint64_t base, size_t remaining,
- string_map& strings, protocol_map& protocols,
- bool verbose)
- {
- const char *err;
- err = objc_stringhash_t::write(base, remaining, strings);
- if (err) return err;
- if (size() > remaining) {
- return "selector section too small (metadata not optimized)";
- }
-
- for (uint32_t i = 0; i < capacity; i++) {
- protocolOffsets()[i] = 0;
- }
-
-
- # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
- protocol_map::const_iterator c;
- for (c = protocols.begin(); c != protocols.end(); ++c) {
- uint32_t h = getIndex(c->first);
- if (h == INDEX_NOT_FOUND) {
- return "protocol list busted (metadata not optimized)";
- }
- int64_t offset = c->second - base;
- if ((offset<<SHIFT)>>SHIFT != offset) {
- return "protocol offset too big (metadata not optimized)";
- }
- protocolOffsets()[h] = (objc_stringhash_offset_t)offset;
- }
- # undef SHIFT
-
- return NULL;
- }
- #endif
- };
- struct objc_headeropt_ro_t;
- struct objc_headeropt_rw_t;
- struct objc_clsopt_t;
- enum { VERSION = 15 };
- enum : uint32_t {
- IsProduction = (1 << 0),
- NoMissingWeakSuperclasses = (1 << 1),
- };
- struct alignas(alignof(void*)) objc_opt_t {
- uint32_t version;
- uint32_t flags;
- int32_t selopt_offset;
- int32_t headeropt_ro_offset;
- int32_t clsopt_offset;
- int32_t protocolopt_offset;
- int32_t headeropt_rw_offset;
- const objc_selopt_t* selopt() const {
- if (selopt_offset == 0) return NULL;
- return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
- }
- objc_selopt_t* selopt() {
- if (selopt_offset == 0) return NULL;
- return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
- }
- struct objc_headeropt_ro_t* headeropt_ro() const {
- if (headeropt_ro_offset == 0) return NULL;
- return (struct objc_headeropt_ro_t *)((uint8_t *)this + headeropt_ro_offset);
- }
- struct objc_clsopt_t* clsopt() const {
- if (clsopt_offset == 0) return NULL;
- return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset);
- }
- struct objc_protocolopt_t* protocolopt() const {
- if (protocolopt_offset == 0) return NULL;
- return (objc_protocolopt_t *)((uint8_t *)this + protocolopt_offset);
- }
- struct objc_headeropt_rw_t* headeropt_rw() const {
- if (headeropt_rw_offset == 0) return NULL;
- return (struct objc_headeropt_rw_t *)((uint8_t *)this + headeropt_rw_offset);
- }
- };
- STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0);
- template <typename T>
- struct objc_opt_pointerlist_tt {
- T protocolClass;
- };
- typedef struct objc_opt_pointerlist_tt<uintptr_t> objc_opt_pointerlist_t;
- #define mix64(a,b,c) \
- { \
- a -= b; a -= c; a ^= (c>>43); \
- b -= c; b -= a; b ^= (a<<9); \
- c -= a; c -= b; c ^= (b>>8); \
- a -= b; a -= c; a ^= (c>>38); \
- b -= c; b -= a; b ^= (a<<23); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>35); \
- b -= c; b -= a; b ^= (a<<49); \
- c -= a; c -= b; c ^= (b>>11); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<18); \
- c -= a; c -= b; c ^= (b>>22); \
- }
- static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level)
- {
- uint64_t a,b,c;
- size_t len;
-
- len = length;
- a = b = level;
- c = 0x9e3779b97f4a7c13LL;
-
- while (len >= 24)
- {
- a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
- +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
- b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
- +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
- c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
- +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
- mix64(a,b,c);
- k += 24; len -= 24;
- }
-
- c += length;
- switch(len)
- {
- case 23: c+=((uint64_t)k[22]<<56);
- case 22: c+=((uint64_t)k[21]<<48);
- case 21: c+=((uint64_t)k[20]<<40);
- case 20: c+=((uint64_t)k[19]<<32);
- case 19: c+=((uint64_t)k[18]<<24);
- case 18: c+=((uint64_t)k[17]<<16);
- case 17: c+=((uint64_t)k[16]<<8);
-
- case 16: b+=((uint64_t)k[15]<<56);
- case 15: b+=((uint64_t)k[14]<<48);
- case 14: b+=((uint64_t)k[13]<<40);
- case 13: b+=((uint64_t)k[12]<<32);
- case 12: b+=((uint64_t)k[11]<<24);
- case 11: b+=((uint64_t)k[10]<<16);
- case 10: b+=((uint64_t)k[ 9]<<8);
- case 9: b+=((uint64_t)k[ 8]);
- case 8: a+=((uint64_t)k[ 7]<<56);
- case 7: a+=((uint64_t)k[ 6]<<48);
- case 6: a+=((uint64_t)k[ 5]<<40);
- case 5: a+=((uint64_t)k[ 4]<<32);
- case 4: a+=((uint64_t)k[ 3]<<24);
- case 3: a+=((uint64_t)k[ 2]<<16);
- case 2: a+=((uint64_t)k[ 1]<<8);
- case 1: a+=((uint64_t)k[ 0]);
-
- }
- mix64(a,b,c);
-
- return c;
- }
- #ifdef SELOPT_WRITE
- typedef uint64_t ub8;
- #define UB8MAXVAL 0xffffffffffffffffLL
- #define UB8BITS 64
- typedef uint32_t ub4;
- #define UB4MAXVAL 0xffffffff
- #define UB4BITS 32
- typedef uint16_t ub2;
- #define UB2MAXVAL 0xffff
- #define UB2BITS 16
- typedef uint8_t ub1;
- #define UB1MAXVAL 0xff
- #define UB1BITS 8
- #define TRUE 1
- #define FALSE 0
- #define SCRAMBLE_LEN 256 // ((ub4)1<<16) /* length of *scramble* */
- #define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */
- #define RETRY_PERFECT 4 /* number of times to try to make a perfect hash */
- struct key
- {
- ub1 *name_k;
- ub4 len_k;
- ub4 hash_k;
- ub4 a_k;
- ub4 b_k;
- struct key *nextb_k;
- };
- typedef struct key key;
- struct bstuff
- {
- ub2 val_b;
- key *list_b;
- ub4 listlen_b;
- ub4 water_b;
- };
- typedef struct bstuff bstuff;
- struct hstuff
- {
- key *key_h;
- };
- typedef struct hstuff hstuff;
- struct qstuff
- {
- bstuff *b_q;
- ub4 parent_q;
- ub2 newval_q;
- ub2 oldval_q;
- };
- typedef struct qstuff qstuff;
- static ub4 log2u(ub4 val)
- {
- ub4 i;
- for (i=0; ((ub4)1<<i) < val; ++i)
- ;
- return i;
- }
- static ub4 permute(ub4 x, ub4 nbits)
- {
- int i;
- int mask = ((ub4)1<<nbits)-1;
- int const2 = 1+nbits/2;
- int const3 = 1+nbits/3;
- int const4 = 1+nbits/4;
- int const5 = 1+nbits/5;
- for (i=0; i<20; ++i)
- {
- x = (x+(x<<const2)) & mask;
- x = (x^(x>>const3));
- x = (x+(x<<const4)) & mask;
- x = (x^(x>>const5));
- }
- return x;
- }
- static void scrambleinit(ub4 *scramble, ub4 smax)
- {
- ub4 i;
-
- for (i=0; i<SCRAMBLE_LEN; ++i)
- {
- scramble[i] = permute(i, log2u(smax));
- }
- }
- static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete)
- {
- int nocollision = TRUE;
- ub4 i;
- memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
-
- for (i = 0; i < nkeys; i++) {
- key *mykey = keys+i;
- key *otherkey;
- for (otherkey=tabb[mykey->b_k].list_b;
- otherkey;
- otherkey=otherkey->nextb_k)
- {
- if (mykey->a_k == otherkey->a_k)
- {
- nocollision = FALSE;
- if (!complete)
- return FALSE;
- }
- }
- ++tabb[mykey->b_k].listlen_b;
- mykey->nextb_k = tabb[mykey->b_k].list_b;
- tabb[mykey->b_k].list_b = mykey;
- }
-
- return nocollision;
- }
- static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt)
- {
- ub4 loga = log2u(alen);
- dispatch_apply(nkeys, DISPATCH_APPLY_AUTO, ^(size_t index) {
- ub4 i = (ub4)index;
- key *mykey = keys+i;
- ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt);
- mykey->a_k = (loga > 0) ? (ub4)(hash >> (UB8BITS-loga)) : 0;
- mykey->b_k = (blen > 1) ? (hash & (blen-1)) : 0;
- });
- }
- static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback)
- {
- ub4 hash;
- key *mykey;
- bstuff *pb;
- ub4 child;
- ub4 parent;
- ub4 stabb;
-
- for (child=tail-1; child; child=parent)
- {
- parent = tabq[child].parent_q;
- pb = tabq[parent].b_q;
-
- stabb = scramble[pb->val_b];
- for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
- {
- hash = mykey->a_k^stabb;
- if (mykey == tabh[hash].key_h)
- {
- tabh[hash].key_h = (key *)0;
- }
- }
-
- pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
-
- stabb = scramble[pb->val_b];
- for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
- {
- hash = mykey->a_k^stabb;
- if (rollback)
- {
- if (parent == 0) continue;
- }
- else if (tabh[hash].key_h)
- {
-
- apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
- return FALSE;
- }
- tabh[hash].key_h = mykey;
- }
- }
- return TRUE;
- }
- static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys,
- ub4 highwater)
- {
- ub4 q;
- ub4 tail;
- ub4 limit=UB1MAXVAL+1;
- ub4 highhash = smax;
-
- tabq[0].b_q = item;
- tail = 1;
-
- for (q=0; q<tail; ++q)
- {
- bstuff *myb = tabq[q].b_q;
- ub4 i;
- if (q == 1)
- break;
- for (i=0; i<limit; ++i)
- {
- bstuff *childb = (bstuff *)0;
- key *mykey;
- for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
- {
- key *childkey;
- ub4 hash = mykey->a_k^scramble[i];
- if (hash >= highhash) break;
- childkey = tabh[hash].key_h;
- if (childkey)
- {
- bstuff *hitb = &tabb[childkey->b_k];
- if (childb)
- {
- if (childb != hitb) break;
- }
- else
- {
- childb = hitb;
- if (childb->water_b == highwater) break;
- }
- }
- }
- if (mykey) continue;
-
- if (childb) childb->water_b = highwater;
- tabq[tail].b_q = childb;
- tabq[tail].newval_q = i;
- tabq[tail].oldval_q = myb->val_b;
- tabq[tail].parent_q = q;
- ++tail;
- if (!childb)
- {
-
- if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
- return TRUE;
- --tail;
- }
- }
- }
- return FALSE;
- }
- static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys)
- {
- ub4 maxkeys;
- ub4 i, j;
- #if SELOPT_DEBUG
- fprintf(stderr, " blen %d smax %d nkeys %d\n", blen, smax, nkeys);
- #endif
-
- memset((void *)tabh, 0, sizeof(hstuff)*smax);
- memset((void *)tabq, 0, sizeof(qstuff)*(blen+1));
- for (maxkeys=0,i=0; i<blen; ++i)
- if (tabb[i].listlen_b > maxkeys)
- maxkeys = tabb[i].listlen_b;
-
- for (j=maxkeys; j>0; --j)
- for (i=0; i<blen; ++i)
- if (tabb[i].listlen_b == j)
- if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
- i+1))
- {
- return FALSE;
- }
-
- return TRUE;
- }
- static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys)
- {
-
- *alen = smax;
- *blen = ((nkeys <= smax*0.6) ? smax/16 :
- (nkeys <= smax*0.8) ? smax/8 : smax/4);
-
- if (*alen < 1) *alen = 1;
- if (*blen < 1) *blen = 1;
- #if SELOPT_DEBUG
- fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys);
- #endif
- }
- static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt,
- ub4 *scramble, ub4 smax, key *keys, ub4 nkeys)
- {
- ub4 bad_initkey;
- ub4 bad_perfect;
- ub4 si;
- ub4 maxalen;
- hstuff *tabh;
- qstuff *tabq;
-
- initalen(alen, blen, smax, nkeys);
- scrambleinit(scramble, smax);
- maxalen = smax;
-
- *tabb = new bstuff[*blen];
- tabq = new qstuff[*blen+1];
- tabh = new hstuff[smax];
-
- *salt = 0;
- bad_initkey = 0;
- bad_perfect = 0;
- for (si=1; ; ++si)
- {
- ub4 rslinit;
-
- *salt = si * 0x9e3779b97f4a7c13LL;
- initnorm(keys, nkeys, *alen, *blen, smax, *salt);
- rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE);
- if (rslinit == 0)
- {
-
- if (++bad_initkey >= RETRY_INITKEY)
- {
-
- if (*alen < maxalen)
- {
- *alen *= 2;
- }
- else if (*blen < smax)
- {
- *blen *= 2;
- delete[] tabq;
- delete[] *tabb;
- *tabb = new bstuff[*blen];
- tabq = new qstuff[*blen+1];
- }
- bad_initkey = 0;
- bad_perfect = 0;
- }
- continue;
- }
-
- if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys))
- {
- if (++bad_perfect >= RETRY_PERFECT)
- {
- if (*blen < smax)
- {
- *blen *= 2;
- delete[] *tabb;
- delete[] tabq;
- *tabb = new bstuff[*blen];
- tabq = new qstuff[*blen+1];
- --si;
- }
- else
- {
- return 0;
- }
- bad_perfect = 0;
- }
- continue;
- }
-
- break;
- }
-
- delete[] tabh;
- delete[] tabq;
- return 1;
- }
- static void getkeys(key **keys, ub4 *nkeys, const string_map& strings)
- {
- key *buf = new key[strings.size()];
- size_t i;
- string_map::const_iterator s;
- for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) {
- key *mykey = buf+i;
- mykey->name_k = (ub1 *)s->first;
- mykey->len_k = (ub4)strlen(s->first);
- }
- *keys = buf;
- *nkeys = (ub4)strings.size();
- }
- static perfect_hash
- make_perfect(const string_map& strings)
- {
- ub4 nkeys;
- key *keys;
- bstuff *tab;
- ub4 smax;
- ub4 alen;
- ub4 blen;
- ub8 salt;
- ub4 scramble[SCRAMBLE_LEN];
- int ok;
- uint32_t i;
- perfect_hash result;
-
- getkeys(&keys, &nkeys, strings);
-
- smax = ((ub4)1<<log2u(nkeys));
- ok = findhash(&tab, &alen, &blen, &salt,
- scramble, smax, keys, nkeys);
- if (!ok) {
- smax = 2 * ((ub4)1<<log2u(nkeys));
- ok = findhash(&tab, &alen, &blen, &salt,
- scramble, smax, keys, nkeys);
- }
- if (!ok) {
- bzero(&result, sizeof(result));
- } else {
-
- result.capacity = smax;
- result.occupied = nkeys;
- result.shift = UB8BITS - log2u(alen);
- result.mask = blen - 1;
- result.salt = salt;
-
- result.tab = new uint8_t[blen];
- for (i = 0; i < blen; i++) {
- result.tab[i] = tab[i].val_b;
- }
- for (i = 0; i < 256; i++) {
- result.scramble[i] = scramble[i];
- }
- }
- delete[] keys;
- delete[] tab;
- return result;
- }
- #endif
- };
- #undef S32
- #undef S64
- #endif
|