|
XCIN Mail-list
|
| Indexed By Date: [Previous] [Next] | Indexed By Thread: [Previous] [Next] |
| Subject: | Re: about xcin and addtsi.... |
| From: | Kuang-che Wu <kcwu@camel.ck.tp.edu.tw> |
| Organization: | Taipei Chien-kuo Senior High School |
| Date: | 7 Dec 2000 20:46:47 GMT |
| To: | xcin@tlug.sinica.edu.tw |
| Reply-To: | xcin@linux.org.tw |
thhsieh@linux.org.tw 提到:
> : 1.對於 match 不到詞的字, 希望能夠先上字頻比較高的字
> : 這字頻也先不用太講究, 可以先拿一般注音輸入法第一選字位的字來應急
之前我說錯了, 現在就是先上字頻比較高的字,
應該是先上 "獨某音, 字頻最高的字"
我想到的作法是, 根據詞庫以及詞庫中填的注音
統計每個字各種唸法出現的頻率, 用這個頻率來上字
一個小問題是, 這功能是不是要放到 libtabe 中, 而不限於只在 bims 中
因為這看起來滿基本、常用的
> : 2.希望能夠手動強迫斷詞
> : 像自然注音那樣, 可以在輸入法認定詞的中間,
> : 按 tab 強迫把該詞的字視為不同一詞
> 這兩個都需要先修改 libtabe/libbims, 然後再配合修改 xcin,工程比較大,
> 我們會慢慢來弄。
希望沒有人正在寫斷詞的 code....
我稍微修改 bims, 使得可以手動強迫斷詞
除了加上 tsiboundary[] 記錄斷詞位置之外,
我還改了
bimsContextDP() 常 realloc, 改成 malloc 一次
一點點的改進效率(真的是一點點, 應該沒什麼差別....)
現在可以強迫斷詞了, 但剛寫好沒作很多的測試,
因此最好再檢查一下....雖然我猜應該沒什麼大問題
還有, 因為我太久沒用自然輸入法了, 因此不太記得 tab 的行為了...
印象中好像 tab 多按幾次會把詞接起來??
我現在的作法是, 按 tab 會切換該位置是否要把詞切斷
沒有其他特殊的功能
以下是 patch
--- bims.c.old Fri Dec 8 04:00:58 2000
+++ bims.c Fri Dec 8 04:01:12 2000
@@ -198,6 +198,10 @@
free(fbc->pindown);
}
fbc->pindown = (ZhiCode *)NULL;
+ if (fbc->tsiboundary) {
+ free(fbc->tsiboundary);
+ }
+ fbc->tsiboundary = (int*)NULL;
fbc->state = BC_STATE_EDITING;
fbc->bcid = 0;
memset(&(fbc->zc), 0, sizeof(fbc->zc));
@@ -439,12 +443,13 @@
struct YinSegInfo *ysinfo = (struct YinSegInfo *)NULL;
int num_ysinfo = 0;
struct smart_com *comb = (struct smart_com *)NULL;
- int i, j, k, rval;
+ int i, j, k, z, rval;
int yinhead, len, ncomb = 0;
int ncand, *cand;
int *tmpcand, tmpncand;
struct TsiYinInfo ty;
struct TsiInfo tsi;
+ int maxcount;
int max_int, index;
double max_double;
#define TMP_BUFFER 80 /* this should be far enough for this implementaion */
@@ -483,15 +488,17 @@
ysinfo[num_ysinfo].yindata = (Yin *)malloc(sizeof(Yin)*2);
memcpy(ysinfo[num_ysinfo].yindata, bc->yin+yinhead, sizeof(Yin)*2);
num_ysinfo++;
- /* done duplicate a two-character word */
- memset(&ty, 0, sizeof(ty));
- ty.yinlen = 2;
- ty.yin = ysinfo[num_ysinfo-1].yindata;
- rval = ydb->Get(ydb, &ty);
- if (!rval) {
- /* tsiyin exists, verify if it has pindown character */
- if (!bimsVerifyPindown(bc, &ty, yinhead, -1)) {
- break;
+ if(!bc->tsiboundary[yinhead+1]) {
+ /* done duplicate a two-character word */
+ memset(&ty, 0, sizeof(ty));
+ ty.yinlen = 2;
+ ty.yin = ysinfo[num_ysinfo-1].yindata;
+ rval = ydb->Get(ydb, &ty);
+ if (!rval) {
+ /* tsiyin exists, verify if it has pindown character */
+ if (!bimsVerifyPindown(bc, &ty, yinhead, -1)) {
+ break;
+ }
}
}
/* no such tsiyin, handle word by word */
@@ -506,6 +513,14 @@
break;
}
for (i = len-yinhead; i > 0; i--) {
+ for(z = 1; z < i; z++) {
+ if(bc->tsiboundary[yinhead+z]) {
+ break;
+ }
+ }
+ if(z != i) {
+ continue;
+ }
memset(&ty, 0, sizeof(ty));
ty.yinlen = i;
memcpy(yin, bc->yin+yinhead, sizeof(Yin)*i);
@@ -519,6 +534,14 @@
}
for (j = len-yinhead-i; j >= 0; j--) {
if (j > 0) {
+ for(z = 1; z < j; z++) {
+ if(bc->tsiboundary[yinhead+i+z]) {
+ break;
+ }
+ }
+ if(z != j) {
+ continue;
+ }
memset(&ty, 0, sizeof(ty));
ty.yinlen = j;
memcpy(yin, bc->yin+yinhead+i, sizeof(Yin)*j);
@@ -532,7 +555,18 @@
}
}
for (k = len-yinhead-i-j; k >= 0; k--) {
+ if (k > 0 && j == 0) {
+ continue;
+ }
if (k > 0) {
+ for(z = 1; z < k; z++) {
+ if(bc->tsiboundary[yinhead+i+j+z]) {
+ break;
+ }
+ }
+ if(z != k) {
+ continue;
+ }
memset(&ty, 0, sizeof(ty));
ty.yinlen = k;
memcpy(yin, bc->yin+yinhead+i+j, sizeof(Yin)*k);
@@ -545,9 +579,6 @@
continue;
}
}
- if (k > 0 && j == 0) {
- continue;
- }
comb = (struct smart_com *)
realloc(comb, sizeof(struct smart_com)*(ncomb+1));
comb[ncomb].s1 = yinhead;
@@ -565,17 +596,20 @@
/* rule 1: largest sum of three-tsi */
max_int = 0;
index = 0;
+ maxcount = 0;
for (i = 0; i < ncomb; i++) {
if (comb[i].len > max_int) {
index = i;
max_int = comb[i].len;
+ maxcount = 1;
+ } else if(comb[i].len == max_int) {
+ maxcount++;
}
}
ncand = 0;
- cand = (int *)NULL;
+ cand = (int *)malloc(sizeof(int)*maxcount);
for (i = 0; i < ncomb; i++) {
if (comb[i].len == max_int) {
- cand = (int *)realloc(cand, sizeof(int)*(ncand+1));
cand[ncand] = i;
ncand++;
}
@@ -588,6 +622,7 @@
else { /* ambiguity */
/* rule 2: largest average word length */
max_double = 0;
+ maxcount = 0;
for (i = 0; i < ncand; i++) {
index = cand[i];
comb[index].avg_word_len = 0;
@@ -611,15 +646,17 @@
comb[index].avg_word_len /= j;
if (comb[index].avg_word_len > max_double) {
max_double = comb[index].avg_word_len;
+ maxcount = 1;
+ } else if(comb[index].avg_word_len == max_double) {
+ maxcount++;
}
}
tmpncand = 0;
- tmpcand = (int *)NULL;
+ tmpcand = (int *)malloc(sizeof(int)*maxcount);
for (i = 0; i < ncand; i++) {
index = cand[i];
if (comb[index].avg_word_len == max_double) {
- tmpcand = (int *)realloc(tmpcand, sizeof(int)*(tmpncand+1));
tmpcand[tmpncand] = index;
tmpncand++;
}
@@ -637,6 +674,7 @@
else { /* ambiguity */
/* rule 3: smallest variance of word length */
max_double = 1000; /* this is misleading */
+ maxcount = 0;
for (i = 0; i < ncand; i++) {
index = cand[i];
comb[index].smallest_var = 0;
@@ -654,15 +692,17 @@
comb[index].smallest_var /= 3;
if (comb[index].smallest_var < max_double) {
max_double = comb[index].smallest_var;
+ maxcount = 1;
+ } else if(comb[index].smallest_var == max_double) {
+ maxcount++;
}
}
tmpncand = 0;
- tmpcand = (int *)NULL;
+ tmpcand = (int *)malloc(sizeof(int)*maxcount);
for (i = 0; i < ncand; i++) {
index = cand[i];
if (comb[index].smallest_var == max_double) {
- tmpcand = (int *)realloc(tmpcand, sizeof(int)*(tmpncand+1));
tmpcand[tmpncand] = index;
tmpncand++;
}
@@ -681,6 +721,7 @@
int max_ref;
/* rule 4: largest sum of tsi ref count */
max_double = 0;
+ maxcount = 0;
for (i = 0; i < ncand; i++) {
index = cand[i];
comb[index].largest_sum = 0;
@@ -750,15 +791,17 @@
if (comb[index].largest_sum > max_double) {
max_double = comb[index].largest_sum;
+ maxcount = 1;
+ } else if(comb[index].largest_sum == max_double) {
+ maxcount++;
}
}
tmpncand = 0;
- tmpcand = (int *)NULL;
+ tmpcand = (int *)malloc(sizeof(int)*maxcount);
for (i = 0; i < ncand; i++) {
index = cand[i];
if (comb[index].largest_sum == max_double) {
- tmpcand = (int *)realloc(tmpcand, sizeof(int)*(tmpncand+1));
tmpcand[tmpncand] = index;
tmpncand++;
}
@@ -926,6 +969,7 @@
* XK_Right move the internal cursor one zhi right
* XK_Backspace
* XK_Delete delete the zhi in front of the internal cursor
+ * XK_Tab switch the tsi boundary
* XK_Return does nothing so far, client may request for string
* others depends on the key mapping the client uses
*
@@ -1006,6 +1050,9 @@
memmove(bc->pindown+(bc->yinpos-1),
bc->pindown+(bc->yinpos),
sizeof(ZhiCode)*(bc->yinlen-(bc->yinpos-1)));
+ memmove(bc->tsiboundary+(bc->yinpos-1),
+ bc->tsiboundary+(bc->yinpos),
+ sizeof(int)*(bc->yinlen-(bc->yinpos-1)));
}
else { /* the last character */
bc->internal_text[(bc->yinlen-1)*2] = (unsigned char)NULL;
@@ -1017,6 +1064,14 @@
}
}
return(BC_VAL_IGNORE);
+ case XK_Tab:
+ if(strlen((char *)bc->zc.string) == 0 && bc->yinlen >0 &&
+ bc->yinlen!=bc->yinpos) {
+ bc->tsiboundary[bc->yinpos]=!bc->tsiboundary[bc->yinpos];
+ bimsContextSmartEdit(bc);
+ return(BC_VAL_ABSORB);
+ }
+ return(BC_VAL_IGNORE);
case XK_Return:
return(BC_VAL_ABSORB);
default:
@@ -1091,6 +1146,11 @@
memmove(bc->pindown+(bc->yinpos+1), bc->pindown+(bc->yinpos),
sizeof(ZhiCode)*(bc->yinlen-bc->yinpos));
bc->pindown[bc->yinpos] = 0;
+ bc->tsiboundary = (int *)realloc(bc->tsiboundary,
+ sizeof(int)*(bc->yinlen+1));
+ memmove(bc->tsiboundary+(bc->yinpos+1), bc->tsiboundary+(bc->yinpos),
+ sizeof(int)*(bc->yinlen-bc->yinpos));
+ bc->tsiboundary[bc->yinpos] = 0;
bc->yinlen++;
bc->yinpos++;
bimsZuYinContextClear(&(bc->zc));
@@ -1311,9 +1371,14 @@
for (;*str;) {
bc->pindown[i] =
(*str)*256 + *(str+1);
+ bc->tsiboundary[i] = 0;
i++;
str += 2;
}
+ if(i != bc->yinlen)
+ bc->tsiboundary[i] = 1;
+ if(bc->yinpos != 0)
+ bc->tsiboundary[bc->yinpos] = 1;
bimsContextSmartEdit(bc);
return(0);
@@ -1369,6 +1434,8 @@
memmove(bc->internal_text, bc->internal_text+newlen*2,
sizeof(unsigned char)*((bc->yinlen-newlen)*2+1));
memmove(bc->pindown, bc->pindown+len, sizeof(ZhiCode)*(bc->yinlen-newlen));
+ memmove(bc->tsiboundary, bc->tsiboundary+len,
+ sizeof(int)*(bc->yinlen-newlen));
bc->yinlen -= newlen;
bimsContextSmartEdit(bc);
--- bims.h.old Fri Dec 8 04:01:06 2000
+++ bims.h Fri Dec 8 04:01:12 2000
@@ -78,6 +78,8 @@
unsigned char *internal_text; /* text: internal text */
ZhiCode *pindown; /* flag: indicating that the Zhi
is pinned down */
+ int *tsiboundary; /* flag: indicating that the Tsi
+ boundary */
int state; /* editing or zhi selection mode */
unsigned long int bcid; /* bimsContext Identifier */
int keymap; /* the type of keymap it uses */
To Unsubscribe: send mail to majordomo@linux.org.tw
with "unsubscribe xcin" in the body of the message
| Indexed By Date | Previous: |
[注音填補]4001-4500 From: ACES.bbs@openbazaar.net (小黃) |
|---|---|---|
| Next: |
Re: about xcin and addtsi.... From: Chih-Hao Tsai <hao520@yahoo.com> |
|
| Indexed By Thread | Previous: |
[注音填補]4001-4500 From: ACES.bbs@openbazaar.net (小黃) |
| Next: |
Re: about xcin and addtsi.... From: Chih-Hao Tsai <hao520@yahoo.com> |