00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "../stdafx.h"
00013 #include "../core/alloc_func.hpp"
00014 #include "../core/endian_func.hpp"
00015 #include "../core/math_func.hpp"
00016 #include "../core/mem_func.hpp"
00017 #include "../string_func.h"
00018 #include "../strings_type.h"
00019 #include "../language.h"
00020 #include "../table/control_codes.h"
00021
00022 #include <stdarg.h>
00023
00024 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00025 #include <unistd.h>
00026 #include <sys/stat.h>
00027 #endif
00028
00029 #if defined WIN32 || defined __WATCOMC__
00030 #include <direct.h>
00031 #endif
00032
00033 #ifdef __MORPHOS__
00034 #ifdef stderr
00035 #undef stderr
00036 #endif
00037 #define stderr stdout
00038 #endif
00039
00040 #include "../table/strgen_tables.h"
00041
00042
00043
00044 struct Case {
00045 int caseidx;
00046 char *string;
00047 Case *next;
00048 };
00049
00050 static bool _masterlang;
00051 static bool _translated;
00052 static bool _translation;
00053 static const char *_file = "(unknown file)";
00054 static FILE *_output_file = NULL;
00055 static const char *_output_filename = NULL;
00056 static int _cur_line;
00057 static int _errors, _warnings, _show_todo;
00058
00059 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
00060
00061 struct LangString {
00062 char *name;
00063 char *english;
00064 char *translated;
00065 uint16 hash_next;
00066 uint16 index;
00067 int line;
00068 Case *translated_case;
00069 };
00070
00071 static LangString *_strings[65536];
00072 static LanguagePackHeader _lang;
00073
00074
00075 #define HASH_SIZE 32767
00076 static uint16 _hash_head[HASH_SIZE];
00077
00078 static byte _put_buf[4096];
00079 static uint _put_pos;
00080 static int _next_string_id;
00081
00082 static uint32 _hash;
00083
00084 static const char *_cur_ident;
00085
00086 struct CmdPair {
00087 const CmdStruct *a;
00088 const char *v;
00089 };
00090
00091 struct ParsedCommandStruct {
00092 uint np;
00093 CmdPair pairs[32];
00094 const CmdStruct *cmd[32];
00095 };
00096
00097
00098 static ParsedCommandStruct _cur_pcs;
00099 static int _cur_argidx;
00100
00101 static uint HashStr(const char *s)
00102 {
00103 uint hash = 0;
00104 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00105 return hash % HASH_SIZE;
00106 }
00107
00108 static void HashAdd(const char *s, LangString *ls)
00109 {
00110 uint hash = HashStr(s);
00111 ls->hash_next = _hash_head[hash];
00112 _hash_head[hash] = ls->index + 1;
00113 }
00114
00115 static LangString *HashFind(const char *s)
00116 {
00117 int idx = _hash_head[HashStr(s)];
00118
00119 while (--idx >= 0) {
00120 LangString *ls = _strings[idx];
00121
00122 if (strcmp(ls->name, s) == 0) return ls;
00123 idx = ls->hash_next;
00124 }
00125 return NULL;
00126 }
00127
00128 #ifdef _MSC_VER
00129 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
00130 #else
00131 # define LINE_NUM_FMT(s) "%s:%d: " s ": %s\n"
00132 #endif
00133
00134 static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
00135
00136 static void CDECL strgen_warning(const char *s, ...)
00137 {
00138 char buf[1024];
00139 va_list va;
00140 va_start(va, s);
00141 vsnprintf(buf, lengthof(buf), s, va);
00142 va_end(va);
00143 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, buf);
00144 _warnings++;
00145 }
00146
00147 static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
00148
00149 static void CDECL strgen_error(const char *s, ...)
00150 {
00151 char buf[1024];
00152 va_list va;
00153 va_start(va, s);
00154 vsnprintf(buf, lengthof(buf), s, va);
00155 va_end(va);
00156 fprintf(stderr, LINE_NUM_FMT("error"), _file, _cur_line, buf);
00157 _errors++;
00158 }
00159
00160 void NORETURN CDECL error(const char *s, ...)
00161 {
00162 char buf[1024];
00163 va_list va;
00164 va_start(va, s);
00165 vsnprintf(buf, lengthof(buf), s, va);
00166 va_end(va);
00167 fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00168 #ifdef _MSC_VER
00169 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00170 #endif
00171
00172 if (_output_file != NULL) {
00173 fclose(_output_file);
00174 unlink(_output_filename);
00175 }
00176 exit(1);
00177 }
00178
00179 static void PutByte(byte c)
00180 {
00181 if (_put_pos >= lengthof(_put_buf)) error("Put buffer too small");
00182 _put_buf[_put_pos++] = c;
00183 }
00184
00185
00186 static void PutUtf8(uint32 value)
00187 {
00188 if (value < 0x80) {
00189 PutByte(value);
00190 } else if (value < 0x800) {
00191 PutByte(0xC0 + GB(value, 6, 5));
00192 PutByte(0x80 + GB(value, 0, 6));
00193 } else if (value < 0x10000) {
00194 PutByte(0xE0 + GB(value, 12, 4));
00195 PutByte(0x80 + GB(value, 6, 6));
00196 PutByte(0x80 + GB(value, 0, 6));
00197 } else if (value < 0x110000) {
00198 PutByte(0xF0 + GB(value, 18, 3));
00199 PutByte(0x80 + GB(value, 12, 6));
00200 PutByte(0x80 + GB(value, 6, 6));
00201 PutByte(0x80 + GB(value, 0, 6));
00202 } else {
00203 strgen_warning("Invalid unicode value U+0x%X", value);
00204 }
00205 }
00206
00207
00208 size_t Utf8Validate(const char *s)
00209 {
00210 uint32 c;
00211
00212 if (!HasBit(s[0], 7)) {
00213
00214 return 1;
00215 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00216
00217 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00218 if (c >= 0x80) return 2;
00219 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00220
00221 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00222 if (c >= 0x800) return 3;
00223 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00224
00225 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00226 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00227 }
00228
00229 return 0;
00230 }
00231
00232
00233 static void EmitSingleChar(char *buf, int value)
00234 {
00235 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00236 PutUtf8(value);
00237 }
00238
00239
00240
00241
00242
00243
00244
00245
00246 bool ParseRelNum(char **buf, int *value, int *offset)
00247 {
00248 const char *s = *buf;
00249 char *end;
00250 bool rel = false;
00251
00252 while (*s == ' ' || *s == '\t') s++;
00253 if (*s == '+') {
00254 rel = true;
00255 s++;
00256 }
00257 int v = strtol(s, &end, 0);
00258 if (end == s) return false;
00259 if (rel || v < 0) {
00260 *value += v;
00261 } else {
00262 *value = v;
00263 }
00264 if (offset != NULL && *end == ':') {
00265
00266 s = end + 1;
00267 *offset = strtol(s, &end, 0);
00268 if (end == s) return false;
00269 }
00270 *buf = end;
00271 return true;
00272 }
00273
00274
00275 char *ParseWord(char **buf)
00276 {
00277 char *s = *buf, *r;
00278
00279 while (*s == ' ' || *s == '\t') s++;
00280 if (*s == '\0') return NULL;
00281
00282 if (*s == '"') {
00283 r = ++s;
00284
00285 for (;;) {
00286 if (*s == '\0') break;
00287 if (*s == '"') {
00288 *s++ = '\0';
00289 break;
00290 }
00291 s++;
00292 }
00293 } else {
00294
00295 r = s;
00296 for (;;) {
00297 if (*s == '\0') break;
00298 if (*s == ' ' || *s == '\t') {
00299 *s++ = '\0';
00300 break;
00301 }
00302 s++;
00303 }
00304 }
00305 *buf = s;
00306 return r;
00307 }
00308
00309
00310 static int TranslateArgumentIdx(int arg, int offset = 0);
00311
00312 static void EmitWordList(const char * const *words, uint nw)
00313 {
00314 PutByte(nw);
00315 for (uint i = 0; i < nw; i++) PutByte(strlen(words[i]) + 1);
00316 for (uint i = 0; i < nw; i++) {
00317 for (uint j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00318 PutByte(0);
00319 }
00320 }
00321
00322 static void EmitPlural(char *buf, int value)
00323 {
00324 int argidx = _cur_argidx;
00325 int offset = 0;
00326 const char *words[5];
00327 int nw = 0;
00328
00329
00330 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00331
00332
00333 for (nw = 0; nw < 5; nw++) {
00334 words[nw] = ParseWord(&buf);
00335 if (words[nw] == NULL) break;
00336 }
00337
00338 if (nw == 0) {
00339 error("%s: No plural words", _cur_ident);
00340 }
00341
00342 if (_plural_forms[_lang.plural_form].plural_count != nw) {
00343 if (_translated) {
00344 error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00345 _plural_forms[_lang.plural_form].plural_count, nw);
00346 } else {
00347 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00348 if (nw > _plural_forms[_lang.plural_form].plural_count) {
00349 nw = _plural_forms[_lang.plural_form].plural_count;
00350 } else {
00351 for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
00352 words[nw] = words[nw - 1];
00353 }
00354 }
00355 }
00356 }
00357
00358 PutUtf8(SCC_PLURAL_LIST);
00359 PutByte(_lang.plural_form);
00360 PutByte(TranslateArgumentIdx(argidx, offset));
00361 EmitWordList(words, nw);
00362 }
00363
00364
00365 static void EmitGender(char *buf, int value)
00366 {
00367 int argidx = _cur_argidx;
00368 int offset = 0;
00369 uint nw;
00370
00371 if (buf[0] == '=') {
00372 buf++;
00373
00374
00375 nw = _lang.GetGenderIndex(buf);
00376 if (nw >= MAX_NUM_GENDERS) error("G argument '%s' invalid", buf);
00377
00378
00379 PutUtf8(SCC_GENDER_INDEX);
00380 PutByte(nw);
00381 } else {
00382 const char *words[MAX_NUM_GENDERS];
00383
00384
00385
00386 if (!ParseRelNum(&buf, &argidx, &offset)) {}
00387
00388 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
00389 if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
00390 error("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
00391 }
00392
00393 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
00394 words[nw] = ParseWord(&buf);
00395 if (words[nw] == NULL) break;
00396 }
00397 if (nw != _lang.num_genders) error("Bad # of arguments for gender command");
00398
00399 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
00400 PutUtf8(SCC_GENDER_LIST);
00401 PutByte(TranslateArgumentIdx(argidx, offset));
00402 EmitWordList(words, nw);
00403 }
00404 }
00405
00406 static const CmdStruct *FindCmd(const char *s, int len)
00407 {
00408 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00409 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00410 }
00411 return NULL;
00412 }
00413
00414 static uint ResolveCaseName(const char *str, uint len)
00415 {
00416
00417 char case_str[CASE_GENDER_LEN];
00418 len = min(lengthof(case_str) - 1, len);
00419 memcpy(case_str, str, len);
00420 case_str[len] = '\0';
00421
00422 uint8 case_idx = _lang.GetCaseIndex(case_str);
00423 if (case_idx >= MAX_NUM_CASES) error("Invalid case-name '%s'", case_str);
00424 return case_idx + 1;
00425 }
00426
00427
00428
00429
00430 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00431 {
00432 const char *s = *str, *start;
00433 char c;
00434
00435 *argno = -1;
00436 *casei = -1;
00437
00438
00439 for (; *s != '{'; s++) {
00440 if (*s == '\0') return NULL;
00441 }
00442 s++;
00443
00444 if (*s >= '0' && *s <= '9') {
00445 char *end;
00446
00447 *argno = strtoul(s, &end, 0);
00448 if (*end != ':') error("missing arg #");
00449 s = end + 1;
00450 }
00451
00452
00453 start = s;
00454 do {
00455 c = *s++;
00456 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00457
00458 const CmdStruct *cmd = FindCmd(start, s - start - 1);
00459 if (cmd == NULL) {
00460 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00461 return NULL;
00462 }
00463
00464 if (c == '.') {
00465 const char *casep = s;
00466
00467 if (!(cmd->flags & C_CASE)) {
00468 error("Command '%s' can't have a case", cmd->cmd);
00469 }
00470
00471 do {
00472 c = *s++;
00473 } while (c != '}' && c != ' ' && c != '\0');
00474 *casei = ResolveCaseName(casep, s - casep - 1);
00475 }
00476
00477 if (c == '\0') {
00478 strgen_error("Missing } from command '%s'", start);
00479 return NULL;
00480 }
00481
00482
00483 if (c != '}') {
00484 if (c == '=') s--;
00485
00486 start = s;
00487 for (;;) {
00488 c = *s++;
00489 if (c == '}') break;
00490 if (c == '\0') {
00491 strgen_error("Missing } from command '%s'", start);
00492 return NULL;
00493 }
00494 if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
00495 *param++ = c;
00496 }
00497 }
00498 *param = '\0';
00499
00500 *str = s;
00501
00502 return cmd;
00503 }
00504
00505
00506 static void HandlePragma(char *str, bool master)
00507 {
00508 if (!memcmp(str, "id ", 3)) {
00509 _next_string_id = strtoul(str + 3, NULL, 0);
00510 } else if (!memcmp(str, "name ", 5)) {
00511 strecpy(_lang.name, str + 5, lastof(_lang.name));
00512 } else if (!memcmp(str, "ownname ", 8)) {
00513 strecpy(_lang.own_name, str + 8, lastof(_lang.own_name));
00514 } else if (!memcmp(str, "isocode ", 8)) {
00515 strecpy(_lang.isocode, str + 8, lastof(_lang.isocode));
00516 } else if (!memcmp(str, "plural ", 7)) {
00517 _lang.plural_form = atoi(str + 7);
00518 if (_lang.plural_form >= lengthof(_plural_forms)) {
00519 error("Invalid pluralform %d", _lang.plural_form);
00520 }
00521 } else if (!memcmp(str, "textdir ", 8)) {
00522 if (!memcmp(str + 8, "ltr", 3)) {
00523 _lang.text_dir = TD_LTR;
00524 } else if (!memcmp(str + 8, "rtl", 3)) {
00525 _lang.text_dir = TD_RTL;
00526 } else {
00527 error("Invalid textdir %s", str + 8);
00528 }
00529 } else if (!memcmp(str, "digitsep ", 9)) {
00530 str += 9;
00531 strecpy(_lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator));
00532 } else if (!memcmp(str, "digitsepcur ", 12)) {
00533 str += 12;
00534 strecpy(_lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator_currency));
00535 } else if (!memcmp(str, "decimalsep ", 11)) {
00536 str += 11;
00537 strecpy(_lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_decimal_separator));
00538 } else if (!memcmp(str, "winlangid ", 10)) {
00539 const char *buf = str + 10;
00540 long langid = strtol(buf, NULL, 16);
00541 if (langid > (long)UINT16_MAX || langid < 0) {
00542 error("Invalid winlangid %s", buf);
00543 }
00544 _lang.winlangid = (uint16)langid;
00545 } else if (!memcmp(str, "grflangid ", 10)) {
00546 const char *buf = str + 10;
00547 long langid = strtol(buf, NULL, 16);
00548 if (langid >= 0x7F || langid < 0) {
00549 error("Invalid grflangid %s", buf);
00550 }
00551 _lang.newgrflangid = (uint8)langid;
00552 } else if (!memcmp(str, "gender ", 7)) {
00553 if (master) error("Genders are not allowed in the base translation.");
00554 char *buf = str + 7;
00555
00556 for (;;) {
00557 const char *s = ParseWord(&buf);
00558
00559 if (s == NULL) break;
00560 if (_lang.num_genders >= MAX_NUM_GENDERS) error("Too many genders, max %d", MAX_NUM_GENDERS);
00561 strecpy(_lang.genders[_lang.num_genders], s, lastof(_lang.genders[_lang.num_genders]));
00562 _lang.num_genders++;
00563 }
00564 } else if (!memcmp(str, "case ", 5)) {
00565 if (master) error("Cases are not allowed in the base translation.");
00566 char *buf = str + 5;
00567
00568 for (;;) {
00569 const char *s = ParseWord(&buf);
00570
00571 if (s == NULL) break;
00572 if (_lang.num_cases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00573 strecpy(_lang.cases[_lang.num_cases], s, lastof(_lang.cases[_lang.num_cases]));
00574 _lang.num_cases++;
00575 }
00576 } else {
00577 error("unknown pragma '%s'", str);
00578 }
00579 }
00580
00581 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00582 {
00583 char param[MAX_COMMAND_PARAM_SIZE];
00584 int argno;
00585 int argidx = 0;
00586 int casei;
00587
00588 memset(p, 0, sizeof(*p));
00589
00590 for (;;) {
00591
00592 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00593
00594 if (ar == NULL) break;
00595
00596
00597 if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00598
00599 if (ar->consumes) {
00600 if (argno != -1) argidx = argno;
00601 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00602 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00603
00604 p->cmd[argidx++] = ar;
00605 } else if (!(ar->flags & C_DONTCOUNT)) {
00606 if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00607 p->pairs[p->np].a = ar;
00608 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00609 p->np++;
00610 }
00611 }
00612 }
00613
00614
00615 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00616 {
00617 if (a == NULL) return NULL;
00618
00619 if (strcmp(a->cmd, "STRING1") == 0 ||
00620 strcmp(a->cmd, "STRING2") == 0 ||
00621 strcmp(a->cmd, "STRING3") == 0 ||
00622 strcmp(a->cmd, "STRING4") == 0 ||
00623 strcmp(a->cmd, "STRING5") == 0 ||
00624 strcmp(a->cmd, "RAW_STRING") == 0) {
00625 return FindCmd("STRING", 6);
00626 }
00627
00628 return a;
00629 }
00630
00631
00632 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00633 {
00634
00635
00636
00637
00638 if (!_translation) return true;
00639
00640 ParsedCommandStruct templ;
00641 ParsedCommandStruct lang;
00642 bool result = true;
00643
00644 ExtractCommandString(&templ, b, true);
00645 ExtractCommandString(&lang, a, true);
00646
00647
00648 if (templ.np != lang.np) {
00649 strgen_warning("%s: template string and language string have a different # of commands", name);
00650 result = false;
00651 }
00652
00653 for (uint i = 0; i < templ.np; i++) {
00654
00655 bool found = false;
00656 for (uint j = 0; j < lang.np; j++) {
00657 if (templ.pairs[i].a == lang.pairs[j].a &&
00658 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00659
00660 lang.pairs[j].a = NULL;
00661 found = true;
00662 break;
00663 }
00664 }
00665
00666 if (!found) {
00667 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00668 result = false;
00669 }
00670 }
00671
00672
00673
00674 for (uint i = 0; i < lengthof(templ.cmd); i++) {
00675 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00676 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00677 lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00678 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00679 result = false;
00680 }
00681 }
00682
00683 return result;
00684 }
00685
00686 static void HandleString(char *str, bool master)
00687 {
00688 if (*str == '#') {
00689 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2, master);
00690 return;
00691 }
00692
00693
00694 if (*str == ';' || *str == ' ' || *str == '\0') return;
00695
00696 char *s = strchr(str, ':');
00697 if (s == NULL) {
00698 strgen_error("Line has no ':' delimiter");
00699 return;
00700 }
00701
00702 char *t;
00703
00704
00705 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00706 *t = 0;
00707 s++;
00708
00709
00710 const char *tmp;
00711 for (tmp = s; *tmp != '\0';) {
00712 size_t len = Utf8Validate(tmp);
00713 if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00714
00715 WChar c;
00716 Utf8Decode(&c, tmp);
00717 if (c <= 0x001F ||
00718 (c >= 0xE000 && c <= 0xF8FF) ||
00719 (c >= 0xFFF0 && c <= 0xFFFF)) {
00720 error("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
00721 }
00722
00723 tmp += len;
00724 }
00725
00726
00727
00728 char *casep = strchr(str, '.');
00729 if (casep) *casep++ = '\0';
00730
00731
00732 LangString *ent = HashFind(str);
00733
00734 if (master) {
00735 if (casep != NULL) {
00736 strgen_error("Cases in the base translation are not supported.");
00737 return;
00738 }
00739
00740 if (ent != NULL) {
00741 strgen_error("String name '%s' is used multiple times", str);
00742 return;
00743 }
00744
00745 if (_strings[_next_string_id]) {
00746 strgen_error("String ID 0x%X for '%s' already in use by '%s'", _next_string_id, str, _strings[_next_string_id]->name);
00747 return;
00748 }
00749
00750
00751 ent = CallocT<LangString>(1);
00752 _strings[_next_string_id] = ent;
00753 ent->index = _next_string_id++;
00754 ent->name = strdup(str);
00755 ent->line = _cur_line;
00756
00757 HashAdd(str, ent);
00758
00759 ent->english = strdup(s);
00760 } else {
00761 if (ent == NULL) {
00762 strgen_warning("String name '%s' does not exist in master file", str);
00763 return;
00764 }
00765
00766 if (ent->translated && casep == NULL) {
00767 strgen_error("String name '%s' is used multiple times", str);
00768 return;
00769 }
00770
00771
00772 if (!CheckCommandsMatch(s, ent->english, str)) return;
00773
00774 if (casep != NULL) {
00775 Case *c = MallocT<Case>(1);
00776
00777 c->caseidx = ResolveCaseName(casep, strlen(casep));
00778 c->string = strdup(s);
00779 c->next = ent->translated_case;
00780 ent->translated_case = c;
00781 } else {
00782 ent->translated = strdup(s);
00783
00784
00785
00786 ent->line = _cur_line;
00787 }
00788 }
00789 }
00790
00791
00792 static void rstrip(char *buf)
00793 {
00794 int i = strlen(buf);
00795 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00796 buf[i] = '\0';
00797 }
00798
00799
00800 static void ParseFile(const char *file, bool english)
00801 {
00802 FILE *in;
00803 char buf[2048];
00804
00805
00806 const char *cur_file = strrchr(_file, PATHSEPCHAR);
00807 const char *next_file = strrchr(file, PATHSEPCHAR);
00808 _translation = next_file != NULL && cur_file != NULL && strcmp(cur_file, next_file) != 0;
00809 _file = file;
00810
00811
00812 MemSetT(&_lang, 0);
00813 strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
00814 strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
00815 strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
00816
00817 in = fopen(file, "r");
00818 if (in == NULL) error("Cannot open file");
00819 _cur_line = 1;
00820 while (fgets(buf, sizeof(buf), in) != NULL) {
00821 rstrip(buf);
00822 HandleString(buf, english);
00823 _cur_line++;
00824 }
00825 fclose(in);
00826
00827 if (StrEmpty(_lang.name) || StrEmpty(_lang.own_name) || StrEmpty(_lang.isocode)) {
00828 error("Language must include ##name, ##ownname and ##isocode");
00829 }
00830 }
00831
00832
00833 static uint32 MyHashStr(uint32 hash, const char *s)
00834 {
00835 for (; *s != '\0'; s++) {
00836 hash = ROL(hash, 3) ^ *s;
00837 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00838 }
00839 return hash;
00840 }
00841
00842
00843
00844 static void MakeHashOfStrings()
00845 {
00846 uint32 hash = 0;
00847 uint i;
00848
00849 for (i = 0; i != lengthof(_strings); i++) {
00850 const LangString *ls = _strings[i];
00851
00852 if (ls != NULL) {
00853 const CmdStruct *cs;
00854 const char *s;
00855 char buf[MAX_COMMAND_PARAM_SIZE];
00856 int argno;
00857 int casei;
00858
00859 s = ls->name;
00860 hash ^= i * 0x717239;
00861 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00862 hash = MyHashStr(hash, s + 1);
00863
00864 s = ls->english;
00865 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00866 if (cs->flags & C_DONTCOUNT) continue;
00867
00868 hash ^= (cs - _cmd_structs) * 0x1234567;
00869 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00870 }
00871 }
00872 }
00873 _hash = hash;
00874 }
00875
00876
00877 static uint CountInUse(uint grp)
00878 {
00879 int i;
00880
00881 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00882 return i + 1;
00883 }
00884
00885
00886 bool CompareFiles(const char *n1, const char *n2)
00887 {
00888 FILE *f2 = fopen(n2, "rb");
00889 if (f2 == NULL) return false;
00890
00891 FILE *f1 = fopen(n1, "rb");
00892 if (f1 == NULL) error("can't open %s", n1);
00893
00894 size_t l1, l2;
00895 do {
00896 char b1[4096];
00897 char b2[4096];
00898 l1 = fread(b1, 1, sizeof(b1), f1);
00899 l2 = fread(b2, 1, sizeof(b2), f2);
00900
00901 if (l1 != l2 || memcmp(b1, b2, l1)) {
00902 fclose(f2);
00903 fclose(f1);
00904 return false;
00905 }
00906 } while (l1);
00907
00908 fclose(f2);
00909 fclose(f1);
00910 return true;
00911 }
00912
00913
00914 static void WriteStringsH(const char *filename)
00915 {
00916 int next = -1;
00917
00918 _output_filename = "tmp.xxx";
00919 _output_file = fopen(_output_filename, "w");
00920 if (_output_file == NULL) error("can't open tmp.xxx");
00921
00922 fprintf(_output_file, "/* This file is automatically generated. Do not modify */\n\n");
00923 fprintf(_output_file, "#ifndef TABLE_STRINGS_H\n");
00924 fprintf(_output_file, "#define TABLE_STRINGS_H\n");
00925
00926 for (int i = 0; i != lengthof(_strings); i++) {
00927 if (_strings[i] != NULL) {
00928 if (next != i) fprintf(_output_file, "\n");
00929 fprintf(_output_file, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
00930 next = i + 1;
00931 }
00932 }
00933
00934 fprintf(_output_file, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n\n", next - 1);
00935
00936
00937 int max_plural_forms = 0;
00938 for (uint i = 0; i < lengthof(_plural_forms); i++) {
00939 max_plural_forms = max(max_plural_forms, _plural_forms[i].plural_count);
00940 }
00941
00942 fprintf(_output_file,
00943 "static const uint LANGUAGE_PACK_VERSION = 0x%X;\n"
00944 "static const uint LANGUAGE_MAX_PLURAL = %d;\n"
00945 "static const uint LANGUAGE_MAX_PLURAL_FORMS = %d;\n\n",
00946 (uint)_hash, (uint)lengthof(_plural_forms), max_plural_forms
00947 );
00948
00949 fprintf(_output_file, "#endif /* TABLE_STRINGS_H */\n");
00950
00951 fclose(_output_file);
00952 _output_file = NULL;
00953
00954 if (CompareFiles(_output_filename, filename)) {
00955
00956 unlink(_output_filename);
00957 } else {
00958
00959 #if defined(WIN32) || defined(WIN64)
00960 unlink(filename);
00961 #endif
00962 if (rename(_output_filename, filename) == -1) error("rename() failed");
00963 }
00964 _output_filename = NULL;
00965 }
00966
00967 static int TranslateArgumentIdx(int argidx, int offset)
00968 {
00969 int sum;
00970
00971 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00972 error("invalid argidx %d", argidx);
00973 }
00974 const CmdStruct *cs = _cur_pcs.cmd[argidx];
00975 if (cs != NULL && cs->consumes <= offset) {
00976 error("invalid argidx offset %d:%d", argidx, offset);
00977 }
00978
00979 if (_cur_pcs.cmd[argidx] == NULL) {
00980 error("no command for this argidx %d", argidx);
00981 }
00982
00983 for (int i = sum = 0; i < argidx; i++) {
00984 const CmdStruct *cs = _cur_pcs.cmd[i];
00985
00986 sum += (cs != NULL) ? cs->consumes : 1;
00987 }
00988
00989 return sum + offset;
00990 }
00991
00992 static void PutArgidxCommand()
00993 {
00994 PutUtf8(SCC_ARG_INDEX);
00995 PutByte(TranslateArgumentIdx(_cur_argidx));
00996 }
00997
00998
00999 static void PutCommandString(const char *str)
01000 {
01001 _cur_argidx = 0;
01002
01003 while (*str != '\0') {
01004
01005 if (*str != '{') {
01006 PutByte(*str++);
01007 continue;
01008 }
01009
01010 char param[MAX_COMMAND_PARAM_SIZE];
01011 int argno;
01012 int casei;
01013 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
01014 if (cs == NULL) break;
01015
01016 if (casei != -1) {
01017 PutUtf8(SCC_SETCASE);
01018 PutByte(casei);
01019 }
01020
01021
01022 if (cs->consumes > 0) {
01023
01024 if (argno != -1 && argno != _cur_argidx) {
01025 _cur_argidx = argno;
01026 PutArgidxCommand();
01027 }
01028
01029
01030 cs = _cur_pcs.cmd[_cur_argidx++];
01031 if (cs == NULL) {
01032 error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01033 }
01034 }
01035
01036 cs->proc(param, cs->value);
01037 }
01038 }
01039
01040 static void WriteLength(FILE *f, uint length)
01041 {
01042 if (length < 0xC0) {
01043 fputc(length, f);
01044 } else if (length < 0x4000) {
01045 fputc((length >> 8) | 0xC0, f);
01046 fputc(length & 0xFF, f);
01047 } else {
01048 error("string too long");
01049 }
01050 }
01051
01052
01053 static void WriteLangfile(const char *filename)
01054 {
01055 uint in_use[32];
01056
01057 _output_filename = filename;
01058 _output_file = fopen(filename, "wb");
01059 if (_output_file == NULL) error("can't open %s", filename);
01060
01061 for (int i = 0; i != 32; i++) {
01062 uint n = CountInUse(i);
01063
01064 in_use[i] = n;
01065 _lang.offsets[i] = TO_LE16(n);
01066 }
01067
01068 _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
01069 _lang.version = TO_LE32(_hash);
01070 _lang.winlangid = TO_LE16(_lang.winlangid);
01071
01072 fwrite(&_lang, sizeof(_lang), 1, _output_file);
01073
01074 for (int i = 0; i != 32; i++) {
01075 for (uint j = 0; j != in_use[i]; j++) {
01076 const LangString *ls = _strings[(i << 11) + j];
01077 const Case *casep;
01078 const char *cmdp;
01079
01080
01081 if (ls == NULL) {
01082 WriteLength(_output_file, 0);
01083 continue;
01084 }
01085
01086 _cur_ident = ls->name;
01087 _cur_line = ls->line;
01088
01089
01090 if (_show_todo > 0 && ls->translated == NULL) {
01091 if ((_show_todo & 2) != 0) {
01092 strgen_warning("'%s' is untranslated", ls->name);
01093 }
01094 if ((_show_todo & 1) != 0) {
01095 const char *s = "<TODO> ";
01096 while (*s != '\0') PutByte(*s++);
01097 }
01098 }
01099
01100
01101 ExtractCommandString(&_cur_pcs, ls->english, false);
01102
01103 if (ls->translated_case != NULL || ls->translated != NULL) {
01104 casep = ls->translated_case;
01105 cmdp = ls->translated;
01106 } else {
01107 casep = NULL;
01108 cmdp = ls->english;
01109 }
01110
01111 _translated = _masterlang || (cmdp != ls->english);
01112
01113 if (casep != NULL) {
01114 const Case *c;
01115 uint num;
01116
01117
01118
01119
01120
01121 PutUtf8(SCC_SWITCH_CASE);
01122
01123 for (num = 0, c = casep; c; c = c->next) num++;
01124 PutByte(num);
01125
01126
01127 for (c = casep; c != NULL; c = c->next) {
01128 uint pos;
01129
01130 PutByte(c->caseidx);
01131
01132 pos = _put_pos;
01133 PutByte(0);
01134 PutByte(0);
01135
01136 PutCommandString(c->string);
01137 PutByte(0);
01138
01139 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01140 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01141 }
01142 }
01143
01144 if (cmdp != NULL) PutCommandString(cmdp);
01145
01146 WriteLength(_output_file, _put_pos);
01147 fwrite(_put_buf, 1, _put_pos, _output_file);
01148 _put_pos = 0;
01149 }
01150 }
01151
01152 fputc(0, _output_file);
01153 fclose(_output_file);
01154
01155 _output_file = NULL;
01156 _output_filename = NULL;
01157 }
01158
01160 static inline void ottd_mkdir(const char *directory)
01161 {
01162 #if defined(WIN32) || defined(__WATCOMC__)
01163 mkdir(directory);
01164 #else
01165 mkdir(directory, 0755);
01166 #endif
01167 }
01168
01174 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01175 {
01176 ttd_strlcpy(buf, path, buflen);
01177
01178 char *p = strchr(buf, '\0');
01179 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01180 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01181 return buf;
01182 }
01183
01184 #if defined(__MINGW32__)
01185
01190 static inline char *replace_pathsep(char *s)
01191 {
01192 for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01193 return s;
01194 }
01195 #else
01196 static inline char *replace_pathsep(char *s) { return s; }
01197 #endif
01198
01199 int CDECL main(int argc, char *argv[])
01200 {
01201 char pathbuf[MAX_PATH];
01202 const char *src_dir = ".";
01203 const char *dest_dir = NULL;
01204
01205 while (argc > 1 && *argv[1] == '-') {
01206 if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01207 puts("$Revision: 21474 $");
01208 return 0;
01209 }
01210
01211 if (strcmp(argv[1], "-export-commands") == 0) {
01212 printf("args\tflags\tcommand\treplacement\n");
01213 for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
01214 char flags;
01215 switch (cs->value) {
01216 case 0x200E: case 0x200F:
01217 case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E:
01218 case 0xA0:
01219 case '\n':
01220 case '{':
01221
01222 flags = 'i';
01223 break;
01224
01225 default:
01226 if (cs->proc == EmitGender) {
01227 flags = 'g';
01228 } else if (cs->proc == EmitPlural) {
01229 flags = 'p';
01230 } else {
01231 flags = '0';
01232 }
01233 }
01234 printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
01235 }
01236 return 0;
01237 }
01238
01239 if (strcmp(argv[1], "-export-plurals") == 0) {
01240 printf("count\tdescription\n");
01241 for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
01242 printf("%i\t\"%s\"\n", pf->plural_count, pf->description);
01243 }
01244 return 0;
01245 }
01246
01247 if (strcmp(argv[1], "-export-pragmas") == 0) {
01248 printf("name\tflags\tdefault\tdescription\n");
01249 for (size_t i = 0; i < lengthof(_pragmas); i++) {
01250 printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
01251 _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
01252 }
01253 return 0;
01254 }
01255
01256 if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01257 _show_todo |= 1;
01258 argc--, argv++;
01259 continue;
01260 }
01261
01262 if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01263 _show_todo |= 2;
01264 argc--, argv++;
01265 continue;
01266 }
01267
01268 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01269 puts(
01270 "strgen - $Revision: 21474 $\n"
01271 " -v | --version print version information and exit\n"
01272 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01273 " -w | --warning print a warning for any untranslated strings\n"
01274 " -h | -? | --help print this help message and exit\n"
01275 " -s | --source_dir search for english.txt in the specified directory\n"
01276 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01277 " -export-commands export all commands and exit\n"
01278 " -export-plurals export all plural forms and exit\n"
01279 " -export-pragmas export all pragmas and exit\n"
01280 " Run without parameters and strgen will search for english.txt and parse it,\n"
01281 " creating strings.h. Passing an argument, strgen will translate that language\n"
01282 " file using english.txt as a reference and output <language>.lng."
01283 );
01284 return 0;
01285 }
01286
01287 if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01288 src_dir = replace_pathsep(argv[2]);
01289 argc -= 2, argv += 2;
01290 continue;
01291 }
01292
01293 if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01294 dest_dir = replace_pathsep(argv[2]);
01295 argc -= 2, argv += 2;
01296 continue;
01297 }
01298
01299 fprintf(stderr, "Invalid arguments\n");
01300 return 0;
01301 }
01302
01303 if (dest_dir == NULL) dest_dir = src_dir;
01304
01305
01306
01307
01308
01309 if (argc == 1) {
01310 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01311
01312
01313 _masterlang = true;
01314 ParseFile(pathbuf, true);
01315 MakeHashOfStrings();
01316 if (_errors) return 1;
01317
01318
01319 ottd_mkdir(dest_dir);
01320 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01321 WriteStringsH(pathbuf);
01322 } else if (argc == 2) {
01323 char *r;
01324
01325 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01326
01327
01328 _masterlang = false;
01329 ParseFile(pathbuf, true);
01330 MakeHashOfStrings();
01331 ParseFile(replace_pathsep(argv[1]), false);
01332 if (_errors) return 1;
01333
01334
01335 r = strrchr(argv[1], PATHSEPCHAR);
01336 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01337
01338
01339 r = strrchr(pathbuf, '.');
01340 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01341 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01342 WriteLangfile(pathbuf);
01343
01344
01345 if ((_show_todo & 2) != 0) {
01346 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01347 }
01348 } else {
01349 fprintf(stderr, "Invalid arguments\n");
01350 }
01351
01352 return 0;
01353 }