<p>摘要:[Lua_Trace] 1. Lua数据结构 - TString</p>
                <br />
                <p>
此源码分析的版本为 : <strong>Lua 5.3.1</strong></p>

2. TString : 字符串结构


//lobject.h

/*
Header for string value; string bytes follow the end of this structure
(aligned according to ‘UTString’; see next).
/
typedef struct TString {
CommonHeader;
lu_byte extra; /
reserved words for short strings; “has hash” for longs /
lu_byte shrlen; /
length for short strings /
unsigned int hash;
union {
size_t lnglen; /
length for long strings */
struct TString hnext; / linked list for hash table */
} u;
} TString;

TString结构由上而下的元素共有:

CommonHeader : GCObject的共有定义。

extra : 短字符串(LUA_TSHRSTR)下,非0之数值表示GC不回收的保留字,如下luaX_tokens定义了Lua所有的保留字。


//llex.c

/* ORDER RESERVED */
static const char *const luaX_tokens [] = {
“and”, “break”, “do”, “else”, “elseif”,
“end”, “false”, “for”, “function”, “goto”, “if”,
“in”, “local”, “nil”, “not”, “or”, “repeat”,
“return”, “then”, “true”, “until”, “while”,
“//”, “…”, “…”, “==”, “>=”, “<=”, “~=”,
“<<”, “>>”, “::”, “”,
“”, “”, “”, “”
};

void luaX_init (lua_State *L) {
int i;
TString e = luaS_newliteral(L, LUA_ENV); / create env name /
luaC_fix(L, obj2gco(e)); /
never collect this name /
for (i=0; iextra = cast_byte(i+1); /
reserved word */
}
};>

extra : 长字符串(LUA_TLNGSTR)则表示是否有私有的hash table,有,设定为1。

shrlen : 字符串长度 (LUA_TSHRSTR使用)。

hash : 杂凑值,由杂凑算法产生。

u : LUA_TLNGSTR下表示为字符串长度(lnglen),LUA_TSHRSTR则表示为hash table的链接(*hnext)。

创建TString时,Lua会在TString(16 bytes)后开一空间存储字符串数据,这样的行为可由如下源码中察觉。


//lstring.c

creates a new string object
*/
static TString *createstrobj (lua_State *L, const char *str, size_t l,
int tag, unsigned int h) {
TString *ts;
GCObject o;
size_t totalsize; /
total size of TString object /
totalsize = sizelstring(l);
o = luaC_newobj(L, tag, totalsize);
ts = gco2ts(o);
ts->hash = h;
ts->extra = 0;
memcpy(getaddrstr(ts), str, l * sizeof(char));
getaddrstr(ts)[l] = ‘’; /
ending 0 */
return ts;
}

换句话说,欲取得字符串的方法可透过如下方式:


TString* p;
char* str=(char*)(p+1); //TString的字符串指针

Lua有针对短字符串(LUA_TSHRSTR : 长度小于40)采取节省内存与性能优化的措施,当多个同为短字符串且内容皆相同的情境下,那它们皆使用同一份TString对象,实现方式是通过字符串来得到一个hash值,并透过全域hash表(g->strt)查找是否存在相同的hash值(重复的字符串),如果已经有了,那么就直接引用该对象(并进行引用数据++操作),反之,Lua会创建一TString放在hash表中对应的位置。


//lstring.c

/*
new string (with explicit length)
*/
TString *luaS_newlstr (lua_State *L, const char str, size_t l) {
if (l <= LUAI_MAXSHORTLEN) /
short string? */
return internshrstr(L, str, l);
else {
TString *ts;
if (l + 1 > (MAX_SIZE - sizeof(TString))/sizeof(char))
luaM_toobig(L);
ts = createstrobj(L, str, l, LUA_TLNGSTR, G(L)->seed);
ts->u.lnglen = l;
return ts;
}
}

/*
checks whether short string exists and reuses it or creates a new one
*/
static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString *ts;
global_State *g = G(L);
unsigned int h = luaS_hash(str, l, g->seed);
TString list = &g->strt.hash[lmod(h, g->strt.size)];
for (ts = list; ts != NULL; ts = ts->u.hnext) {
if (l == ts->shrlen &&
(memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
/
found! /
if (isdead(g, ts)) /
dead (but not collected yet)? /
changewhite(ts); /
resurrect it /
return ts;
}
}
if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) {
luaS_resize(L, g->strt.size * 2);
list = &g->strt.hash[lmod(h, g->strt.size)]; /
recompute with new size */
}
ts = createstrobj(L, str, l, LUA_TSHRSTR, h);
ts->shrlen = cast_byte(l);
ts->u.hnext = *list;
*list = ts;
g->strt.nuse++;
return ts;
}

有任何错误请指正,后续我将尽可能的再补充详细