00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "../stdafx.h"
00013 #include "../core/alloc_func.hpp"
00014 #include "../core/endian_func.hpp"
00015 #include "../string_func.h"
00016 #include "../strings_type.h"
00017 #include "strgen.h"
00018 #include "../table/control_codes.h"
00019
00020 #include <stdarg.h>
00021
00022 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00023 #include <unistd.h>
00024 #include <sys/stat.h>
00025 #endif
00026
00027 #if defined WIN32 || defined __WATCOMC__
00028 #include <direct.h>
00029 #endif
00030
00031 #ifdef __MORPHOS__
00032 #ifdef stderr
00033 #undef stderr
00034 #endif
00035 #define stderr stdout
00036 #endif
00037
00038 #include "../table/strgen_tables.h"
00039
00040
00041
00042 struct Case {
00043 int caseidx;
00044 char *string;
00045 Case *next;
00046 };
00047
00048 static bool _masterlang;
00049 static bool _translated;
00050 static bool _translation;
00051 static const char *_file = "(unknown file)";
00052 static FILE *_output_file = NULL;
00053 static const char *_output_filename = NULL;
00054 static int _cur_line;
00055 static int _errors, _warnings, _show_todo;
00056
00057 struct LangString {
00058 char *name;
00059 char *english;
00060 char *translated;
00061 uint16 hash_next;
00062 uint16 index;
00063 int line;
00064 Case *english_case;
00065 Case *translated_case;
00066 };
00067
00068 static LangString *_strings[65536];
00069
00070
00071 #define HASH_SIZE 32767
00072 static uint16 _hash_head[HASH_SIZE];
00073
00074 static byte _put_buf[4096];
00075 static int _put_pos;
00076 static int _next_string_id;
00077
00078 static uint32 _hash;
00079 static char _lang_name[32], _lang_ownname[32], _lang_isocode[16];
00080 static char _lang_digit_group_separator[8];
00081 static char _lang_digit_group_separator_currency[8];
00082 static char _lang_digit_decimal_separator[8];
00083 static byte _lang_pluralform;
00084 static byte _lang_textdir;
00085 static uint16 _lang_winlangid;
00086 static uint8 _lang_newgrflangid;
00087 #define MAX_NUM_GENDER 8
00088 static char _genders[MAX_NUM_GENDER][16];
00089 static uint _numgenders;
00090
00091
00092 #define MAX_NUM_CASES 50
00093 static char _cases[MAX_NUM_CASES][16];
00094 static uint _numcases;
00095
00096 static const char *_cur_ident;
00097
00098 struct CmdPair {
00099 const CmdStruct *a;
00100 const char *v;
00101 };
00102
00103 struct ParsedCommandStruct {
00104 uint np;
00105 CmdPair pairs[32];
00106 const CmdStruct *cmd[32];
00107 };
00108
00109
00110 static ParsedCommandStruct _cur_pcs;
00111 static int _cur_argidx;
00112
00113 static uint HashStr(const char *s)
00114 {
00115 uint hash = 0;
00116 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00117 return hash % HASH_SIZE;
00118 }
00119
00120 static void HashAdd(const char *s, LangString *ls)
00121 {
00122 uint hash = HashStr(s);
00123 ls->hash_next = _hash_head[hash];
00124 _hash_head[hash] = ls->index + 1;
00125 }
00126
00127 static LangString *HashFind(const char *s)
00128 {
00129 int idx = _hash_head[HashStr(s)];
00130
00131 while (--idx >= 0) {
00132 LangString *ls = _strings[idx];
00133
00134 if (strcmp(ls->name, s) == 0) return ls;
00135 idx = ls->hash_next;
00136 }
00137 return NULL;
00138 }
00139
00140 #ifdef _MSC_VER
00141 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
00142 #else
00143 # define LINE_NUM_FMT(s) "%s: :%d: " s ": %s\n"
00144 #endif
00145
00146 static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
00147
00148 static void CDECL strgen_warning(const char *s, ...)
00149 {
00150 char buf[1024];
00151 va_list va;
00152 va_start(va, s);
00153 vsnprintf(buf, lengthof(buf), s, va);
00154 va_end(va);
00155 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, buf);
00156 _warnings++;
00157 }
00158
00159 static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
00160
00161 static void CDECL strgen_error(const char *s, ...)
00162 {
00163 char buf[1024];
00164 va_list va;
00165 va_start(va, s);
00166 vsnprintf(buf, lengthof(buf), s, va);
00167 va_end(va);
00168 fprintf(stderr, LINE_NUM_FMT("error"), _file, _cur_line, buf);
00169 _errors++;
00170 }
00171
00172 void NORETURN CDECL error(const char *s, ...)
00173 {
00174 char buf[1024];
00175 va_list va;
00176 va_start(va, s);
00177 vsnprintf(buf, lengthof(buf), s, va);
00178 va_end(va);
00179 fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00180 #ifdef _MSC_VER
00181 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00182 #endif
00183
00184 if (_output_file != NULL) {
00185 fclose(_output_file);
00186 unlink(_output_filename);
00187 }
00188 exit(1);
00189 }
00190
00191 static void PutByte(byte c)
00192 {
00193 if (_put_pos == lengthof(_put_buf)) error("Put buffer too small");
00194 _put_buf[_put_pos++] = c;
00195 }
00196
00197
00198 static void PutUtf8(uint32 value)
00199 {
00200 if (value < 0x80) {
00201 PutByte(value);
00202 } else if (value < 0x800) {
00203 PutByte(0xC0 + GB(value, 6, 5));
00204 PutByte(0x80 + GB(value, 0, 6));
00205 } else if (value < 0x10000) {
00206 PutByte(0xE0 + GB(value, 12, 4));
00207 PutByte(0x80 + GB(value, 6, 6));
00208 PutByte(0x80 + GB(value, 0, 6));
00209 } else if (value < 0x110000) {
00210 PutByte(0xF0 + GB(value, 18, 3));
00211 PutByte(0x80 + GB(value, 12, 6));
00212 PutByte(0x80 + GB(value, 6, 6));
00213 PutByte(0x80 + GB(value, 0, 6));
00214 } else {
00215 strgen_warning("Invalid unicode value U+0x%X", value);
00216 }
00217 }
00218
00219
00220 size_t Utf8Validate(const char *s)
00221 {
00222 uint32 c;
00223
00224 if (!HasBit(s[0], 7)) {
00225
00226 return 1;
00227 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00228
00229 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00230 if (c >= 0x80) return 2;
00231 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00232
00233 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00234 if (c >= 0x800) return 3;
00235 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00236
00237 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00238 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00239 }
00240
00241 return 0;
00242 }
00243
00244
00245 static void EmitSingleChar(char *buf, int value)
00246 {
00247 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00248 PutUtf8(value);
00249 }
00250
00251
00252
00253
00254
00255
00256
00257
00258 bool ParseRelNum(char **buf, int *value, int *offset)
00259 {
00260 const char *s = *buf;
00261 char *end;
00262 bool rel = false;
00263
00264 while (*s == ' ' || *s == '\t') s++;
00265 if (*s == '+') {
00266 rel = true;
00267 s++;
00268 }
00269 int v = strtol(s, &end, 0);
00270 if (end == s) return false;
00271 if (rel || v < 0) {
00272 *value += v;
00273 } else {
00274 *value = v;
00275 }
00276 if (offset != NULL && *end == ':') {
00277
00278 s = end + 1;
00279 *offset = strtol(s, &end, 0);
00280 if (end == s) return false;
00281 }
00282 *buf = end;
00283 return true;
00284 }
00285
00286
00287 char *ParseWord(char **buf)
00288 {
00289 char *s = *buf, *r;
00290
00291 while (*s == ' ' || *s == '\t') s++;
00292 if (*s == '\0') return NULL;
00293
00294 if (*s == '"') {
00295 r = ++s;
00296
00297 for (;;) {
00298 if (*s == '\0') break;
00299 if (*s == '"') {
00300 *s++ = '\0';
00301 break;
00302 }
00303 s++;
00304 }
00305 } else {
00306
00307 r = s;
00308 for (;;) {
00309 if (*s == '\0') break;
00310 if (*s == ' ' || *s == '\t') {
00311 *s++ = '\0';
00312 break;
00313 }
00314 s++;
00315 }
00316 }
00317 *buf = s;
00318 return r;
00319 }
00320
00321
00322 static int TranslateArgumentIdx(int arg, int offset = 0);
00323
00324 static void EmitWordList(const char * const *words, uint nw)
00325 {
00326 PutByte(nw);
00327 for (uint i = 0; i < nw; i++) PutByte(strlen(words[i]) + 1);
00328 for (uint i = 0; i < nw; i++) {
00329 for (uint j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00330 PutByte(0);
00331 }
00332 }
00333
00334 static void EmitPlural(char *buf, int value)
00335 {
00336 int argidx = _cur_argidx;
00337 int offset = 0;
00338 const char *words[5];
00339 int nw = 0;
00340
00341
00342 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00343
00344
00345 for (nw = 0; nw < 5; nw++) {
00346 words[nw] = ParseWord(&buf);
00347 if (words[nw] == NULL) break;
00348 }
00349
00350 if (nw == 0) {
00351 error("%s: No plural words", _cur_ident);
00352 }
00353
00354 if (_plural_forms[_lang_pluralform].plural_count != nw) {
00355 if (_translated) {
00356 error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00357 _plural_forms[_lang_pluralform].plural_count, nw);
00358 } else {
00359 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00360 if (nw > _plural_forms[_lang_pluralform].plural_count) {
00361 nw = _plural_forms[_lang_pluralform].plural_count;
00362 } else {
00363 for (; nw < _plural_forms[_lang_pluralform].plural_count; nw++) {
00364 words[nw] = words[nw - 1];
00365 }
00366 }
00367 }
00368 }
00369
00370 PutUtf8(SCC_PLURAL_LIST);
00371 PutByte(TranslateArgumentIdx(argidx, offset));
00372 EmitWordList(words, nw);
00373 }
00374
00375
00376 static void EmitGender(char *buf, int value)
00377 {
00378 int argidx = _cur_argidx;
00379 int offset = 0;
00380 uint nw;
00381
00382 if (buf[0] == '=') {
00383 buf++;
00384
00385
00386 for (nw = 0; ; nw++) {
00387 if (nw >= MAX_NUM_GENDER) error("G argument '%s' invalid", buf);
00388 if (strcmp(buf, _genders[nw]) == 0) break;
00389 }
00390
00391 PutUtf8(SCC_GENDER_INDEX);
00392 PutByte(nw);
00393 } else {
00394 const char *words[MAX_NUM_GENDER];
00395
00396
00397
00398 if (!ParseRelNum(&buf, &argidx, &offset)) {}
00399
00400 for (nw = 0; nw < MAX_NUM_GENDER; nw++) {
00401 words[nw] = ParseWord(&buf);
00402 if (words[nw] == NULL) break;
00403 }
00404 if (nw != _numgenders) error("Bad # of arguments for gender command");
00405 PutUtf8(SCC_GENDER_LIST);
00406 PutByte(TranslateArgumentIdx(argidx, offset));
00407 EmitWordList(words, nw);
00408 }
00409 }
00410
00411 static const CmdStruct *FindCmd(const char *s, int len)
00412 {
00413 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00414 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00415 }
00416 return NULL;
00417 }
00418
00419 static uint ResolveCaseName(const char *str, uint len)
00420 {
00421 for (uint i = 0; i < MAX_NUM_CASES; i++) {
00422 if (memcmp(_cases[i], str, len) == 0 && _cases[i][len] == 0) return i + 1;
00423 }
00424 error("Invalid case-name '%s'", str);
00425 }
00426
00427
00428
00429
00430 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00431 {
00432 const char *s = *str, *start;
00433 char c;
00434
00435 *argno = -1;
00436 *casei = -1;
00437
00438
00439 for (; *s != '{'; s++) {
00440 if (*s == '\0') return NULL;
00441 }
00442 s++;
00443
00444 if (*s >= '0' && *s <= '9') {
00445 char *end;
00446
00447 *argno = strtoul(s, &end, 0);
00448 if (*end != ':') error("missing arg #");
00449 s = end + 1;
00450 }
00451
00452
00453 start = s;
00454 do {
00455 c = *s++;
00456 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00457
00458 const CmdStruct *cmd = FindCmd(start, s - start - 1);
00459 if (cmd == NULL) {
00460 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00461 return NULL;
00462 }
00463
00464 if (c == '.') {
00465 const char *casep = s;
00466
00467 if (!(cmd->flags & C_CASE)) {
00468 error("Command '%s' can't have a case", cmd->cmd);
00469 }
00470
00471 do {
00472 c = *s++;
00473 } while (c != '}' && c != ' ' && c != '\0');
00474 *casei = ResolveCaseName(casep, s - casep - 1);
00475 }
00476
00477 if (c == '\0') {
00478 strgen_error("Missing } from command '%s'", start);
00479 return NULL;
00480 }
00481
00482
00483 if (c != '}') {
00484 if (c == '=') s--;
00485
00486 start = s;
00487 for (;;) {
00488 c = *s++;
00489 if (c == '}') break;
00490 if (c == '\0') {
00491 strgen_error("Missing } from command '%s'", start);
00492 return NULL;
00493 }
00494 if (s - start == 250) error("param command too long");
00495 *param++ = c;
00496 }
00497 }
00498 *param = '\0';
00499
00500 *str = s;
00501
00502 return cmd;
00503 }
00504
00505
00506 static void HandlePragma(char *str)
00507 {
00508 if (!memcmp(str, "id ", 3)) {
00509 _next_string_id = strtoul(str + 3, NULL, 0);
00510 } else if (!memcmp(str, "name ", 5)) {
00511 strecpy(_lang_name, str + 5, lastof(_lang_name));
00512 } else if (!memcmp(str, "ownname ", 8)) {
00513 strecpy(_lang_ownname, str + 8, lastof(_lang_ownname));
00514 } else if (!memcmp(str, "isocode ", 8)) {
00515 strecpy(_lang_isocode, str + 8, lastof(_lang_isocode));
00516 } else if (!memcmp(str, "plural ", 7)) {
00517 _lang_pluralform = atoi(str + 7);
00518 if (_lang_pluralform >= lengthof(_plural_forms))
00519 error("Invalid pluralform %d", _lang_pluralform);
00520 } else if (!memcmp(str, "textdir ", 8)) {
00521 if (!memcmp(str + 8, "ltr", 3)) {
00522 _lang_textdir = TD_LTR;
00523 } else if (!memcmp(str + 8, "rtl", 3)) {
00524 _lang_textdir = TD_RTL;
00525 } else {
00526 error("Invalid textdir %s", str + 8);
00527 }
00528 } else if (!memcmp(str, "digitsep ", 9)) {
00529 str += 9;
00530 strecpy(_lang_digit_group_separator, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_group_separator));
00531 } else if (!memcmp(str, "digitsepcur ", 12)) {
00532 str += 12;
00533 strecpy(_lang_digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_group_separator_currency));
00534 } else if (!memcmp(str, "decimalsep ", 11)) {
00535 str += 11;
00536 strecpy(_lang_digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_decimal_separator));
00537 } else if (!memcmp(str, "winlangid ", 10)) {
00538 const char *buf = str + 10;
00539 long langid = strtol(buf, NULL, 16);
00540 if (langid > (long)UINT16_MAX || langid < 0) {
00541 error("Invalid winlangid %s", buf);
00542 }
00543 _lang_winlangid = (uint16)langid;
00544 } else if (!memcmp(str, "grflangid ", 10)) {
00545 const char *buf = str + 10;
00546 long langid = strtol(buf, NULL, 16);
00547 if (langid >= 0x7F || langid < 0) {
00548 error("Invalid grflangid %s", buf);
00549 }
00550 _lang_newgrflangid = (uint8)langid;
00551 } else if (!memcmp(str, "gender ", 7)) {
00552 char *buf = str + 7;
00553
00554 for (;;) {
00555 const char *s = ParseWord(&buf);
00556
00557 if (s == NULL) break;
00558 if (_numgenders >= MAX_NUM_GENDER) error("Too many genders, max %d", MAX_NUM_GENDER);
00559 strecpy(_genders[_numgenders], s, lastof(_genders[_numgenders]));
00560 _numgenders++;
00561 }
00562 } else if (!memcmp(str, "case ", 5)) {
00563 char *buf = str + 5;
00564
00565 for (;;) {
00566 const char *s = ParseWord(&buf);
00567
00568 if (s == NULL) break;
00569 if (_numcases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00570 strecpy(_cases[_numcases], s, lastof(_cases[_numcases]));
00571 _numcases++;
00572 }
00573 } else {
00574 error("unknown pragma '%s'", str);
00575 }
00576 }
00577
00578 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00579 {
00580 char param[100];
00581 int argno;
00582 int argidx = 0;
00583 int casei;
00584
00585 memset(p, 0, sizeof(*p));
00586
00587 for (;;) {
00588
00589 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00590
00591 if (ar == NULL) break;
00592
00593
00594 if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00595
00596 if (ar->consumes) {
00597 if (argno != -1) argidx = argno;
00598 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00599 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00600
00601 p->cmd[argidx++] = ar;
00602 } else if (!(ar->flags & C_DONTCOUNT)) {
00603 if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00604 p->pairs[p->np].a = ar;
00605 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00606 p->np++;
00607 }
00608 }
00609 }
00610
00611
00612 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00613 {
00614 if (a == NULL) return NULL;
00615
00616 if (strcmp(a->cmd, "STRING1") == 0 ||
00617 strcmp(a->cmd, "STRING2") == 0 ||
00618 strcmp(a->cmd, "STRING3") == 0 ||
00619 strcmp(a->cmd, "STRING4") == 0 ||
00620 strcmp(a->cmd, "STRING5") == 0 ||
00621 strcmp(a->cmd, "RAW_STRING") == 0) {
00622 return FindCmd("STRING", 6);
00623 }
00624
00625 return a;
00626 }
00627
00628
00629 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00630 {
00631
00632
00633
00634
00635 if (!_translation) return true;
00636
00637 ParsedCommandStruct templ;
00638 ParsedCommandStruct lang;
00639 bool result = true;
00640
00641 ExtractCommandString(&templ, b, true);
00642 ExtractCommandString(&lang, a, true);
00643
00644
00645 if (templ.np != lang.np) {
00646 strgen_warning("%s: template string and language string have a different # of commands", name);
00647 result = false;
00648 }
00649
00650 for (uint i = 0; i < templ.np; i++) {
00651
00652 bool found = false;
00653 for (uint j = 0; j < lang.np; j++) {
00654 if (templ.pairs[i].a == lang.pairs[j].a &&
00655 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00656
00657 lang.pairs[j].a = NULL;
00658 found = true;
00659 break;
00660 }
00661 }
00662
00663 if (!found) {
00664 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00665 result = false;
00666 }
00667 }
00668
00669
00670
00671 for (uint i = 0; i < lengthof(templ.cmd); i++) {
00672 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00673 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00674 lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00675 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00676 result = false;
00677 }
00678 }
00679
00680 return result;
00681 }
00682
00683 static void HandleString(char *str, bool master)
00684 {
00685 if (*str == '#') {
00686 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2);
00687 return;
00688 }
00689
00690
00691 if (*str == ';' || *str == ' ' || *str == '\0') return;
00692
00693 char *s = strchr(str, ':');
00694 if (s == NULL) {
00695 strgen_error("Line has no ':' delimiter");
00696 return;
00697 }
00698
00699 char *t;
00700
00701
00702 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00703 *t = 0;
00704 s++;
00705
00706
00707 const char *tmp;
00708 for (tmp = s; *tmp != '\0';) {
00709 size_t len = Utf8Validate(tmp);
00710 if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00711 tmp += len;
00712 }
00713
00714
00715
00716 char *casep = strchr(str, '.');
00717 if (casep) *casep++ = '\0';
00718
00719
00720 LangString *ent = HashFind(str);
00721
00722 if (master) {
00723 if (ent != NULL && casep == NULL) {
00724 strgen_error("String name '%s' is used multiple times", str);
00725 return;
00726 }
00727
00728 if (ent == NULL && casep != NULL) {
00729 strgen_error("Base string name '%s' doesn't exist yet. Define it before defining a case.", str);
00730 return;
00731 }
00732
00733 if (ent == NULL) {
00734 if (_strings[_next_string_id]) {
00735 strgen_error("String ID 0x%X for '%s' already in use by '%s'", _next_string_id, str, _strings[_next_string_id]->name);
00736 return;
00737 }
00738
00739
00740 ent = CallocT<LangString>(1);
00741 _strings[_next_string_id] = ent;
00742 ent->index = _next_string_id++;
00743 ent->name = strdup(str);
00744 ent->line = _cur_line;
00745
00746 HashAdd(str, ent);
00747 }
00748
00749 if (casep != NULL) {
00750 Case *c = MallocT<Case>(1);
00751
00752 c->caseidx = ResolveCaseName(casep, strlen(casep));
00753 c->string = strdup(s);
00754 c->next = ent->english_case;
00755 ent->english_case = c;
00756 } else {
00757 ent->english = strdup(s);
00758 }
00759
00760 } else {
00761 if (ent == NULL) {
00762 strgen_warning("String name '%s' does not exist in master file", str);
00763 return;
00764 }
00765
00766 if (ent->translated && casep == NULL) {
00767 strgen_error("String name '%s' is used multiple times", str);
00768 return;
00769 }
00770
00771 if (s[0] == ':' && s[1] == '\0' && casep == NULL) {
00772
00773 ent->translated = strdup(ent->english);
00774 } else {
00775
00776 if (!CheckCommandsMatch(s, ent->english, str)) return;
00777
00778 if (casep != NULL) {
00779 Case *c = MallocT<Case>(1);
00780
00781 c->caseidx = ResolveCaseName(casep, strlen(casep));
00782 c->string = strdup(s);
00783 c->next = ent->translated_case;
00784 ent->translated_case = c;
00785 } else {
00786 ent->translated = strdup(s);
00787
00788
00789
00790 ent->line = _cur_line;
00791 }
00792 }
00793 }
00794 }
00795
00796
00797 static void rstrip(char *buf)
00798 {
00799 int i = strlen(buf);
00800 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00801 buf[i] = '\0';
00802 }
00803
00804
00805 static void ParseFile(const char *file, bool english)
00806 {
00807 FILE *in;
00808 char buf[2048];
00809
00810
00811 const char *cur_file = strrchr(_file, PATHSEPCHAR);
00812 const char *next_file = strrchr(file, PATHSEPCHAR);
00813 _translation = next_file != NULL && cur_file != NULL && strcmp(cur_file, next_file) != 0;
00814 _file = file;
00815
00816
00817 _numgenders = 0;
00818 _lang_name[0] = _lang_ownname[0] = _lang_isocode[0] = '\0';
00819 strecpy(_lang_digit_group_separator, ",", lastof(_lang_digit_group_separator));
00820 strecpy(_lang_digit_group_separator_currency, ",", lastof(_lang_digit_group_separator_currency));
00821 strecpy(_lang_digit_decimal_separator, ".", lastof(_lang_digit_decimal_separator));
00822 _lang_textdir = TD_LTR;
00823 _lang_winlangid = 0x0000;
00824 _lang_newgrflangid = 0;
00825
00826
00827
00828 in = fopen(file, "r");
00829 if (in == NULL) error("Cannot open file");
00830 _cur_line = 1;
00831 while (fgets(buf, sizeof(buf), in) != NULL) {
00832 rstrip(buf);
00833 HandleString(buf, english);
00834 _cur_line++;
00835 }
00836 fclose(in);
00837
00838 if (StrEmpty(_lang_name) || StrEmpty(_lang_ownname) || StrEmpty(_lang_isocode)) {
00839 error("Language must include ##name, ##ownname and ##isocode");
00840 }
00841 }
00842
00843
00844 static uint32 MyHashStr(uint32 hash, const char *s)
00845 {
00846 for (; *s != '\0'; s++) {
00847 hash = ROL(hash, 3) ^ *s;
00848 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00849 }
00850 return hash;
00851 }
00852
00853
00854
00855 static void MakeHashOfStrings()
00856 {
00857 uint32 hash = 0;
00858 uint i;
00859
00860 for (i = 0; i != lengthof(_strings); i++) {
00861 const LangString *ls = _strings[i];
00862
00863 if (ls != NULL) {
00864 const CmdStruct *cs;
00865 const char *s;
00866 char buf[256];
00867 int argno;
00868 int casei;
00869
00870 s = ls->name;
00871 hash ^= i * 0x717239;
00872 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00873 hash = MyHashStr(hash, s + 1);
00874
00875 s = ls->english;
00876 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00877 if (cs->flags & C_DONTCOUNT) continue;
00878
00879 hash ^= (cs - _cmd_structs) * 0x1234567;
00880 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00881 }
00882 }
00883 }
00884 _hash = hash;
00885 }
00886
00887
00888 static uint CountInUse(uint grp)
00889 {
00890 int i;
00891
00892 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00893 return i + 1;
00894 }
00895
00896
00897 bool CompareFiles(const char *n1, const char *n2)
00898 {
00899 FILE *f2 = fopen(n2, "rb");
00900 if (f2 == NULL) return false;
00901
00902 FILE *f1 = fopen(n1, "rb");
00903 if (f1 == NULL) error("can't open %s", n1);
00904
00905 size_t l1, l2;
00906 do {
00907 char b1[4096];
00908 char b2[4096];
00909 l1 = fread(b1, 1, sizeof(b1), f1);
00910 l2 = fread(b2, 1, sizeof(b2), f2);
00911
00912 if (l1 != l2 || memcmp(b1, b2, l1)) {
00913 fclose(f2);
00914 fclose(f1);
00915 return false;
00916 }
00917 } while (l1);
00918
00919 fclose(f2);
00920 fclose(f1);
00921 return true;
00922 }
00923
00924
00925 static void WriteStringsH(const char *filename)
00926 {
00927 int next = -1;
00928
00929 _output_filename = "tmp.xxx";
00930 _output_file = fopen(_output_filename, "w");
00931 if (_output_file == NULL) error("can't open tmp.xxx");
00932
00933 fprintf(_output_file, "/* This file is automatically generated. Do not modify */\n\n");
00934 fprintf(_output_file, "#ifndef TABLE_STRINGS_H\n");
00935 fprintf(_output_file, "#define TABLE_STRINGS_H\n");
00936
00937 for (int i = 0; i != lengthof(_strings); i++) {
00938 if (_strings[i] != NULL) {
00939 if (next != i) fprintf(_output_file, "\n");
00940 fprintf(_output_file, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
00941 next = i + 1;
00942 }
00943 }
00944
00945 fprintf(_output_file, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n", next - 1);
00946
00947 fprintf(_output_file,
00948 "\nenum {\n"
00949 "\tLANGUAGE_PACK_IDENT = 0x474E414C, // Big Endian value for 'LANG' (LE is 0x 4C 41 4E 47)\n"
00950 "\tLANGUAGE_PACK_VERSION = 0x%X,\n"
00951 "};\n", (uint)_hash
00952 );
00953
00954 fprintf(_output_file, "\n#endif /* TABLE_STRINGS_H */\n");
00955
00956 fclose(_output_file);
00957 _output_file = NULL;
00958
00959 if (CompareFiles(_output_filename, filename)) {
00960
00961 unlink(_output_filename);
00962 } else {
00963
00964 #if defined(WIN32) || defined(WIN64)
00965 unlink(filename);
00966 #endif
00967 if (rename(_output_filename, filename) == -1) error("rename() failed");
00968 }
00969 _output_filename = NULL;
00970 }
00971
00972 static int TranslateArgumentIdx(int argidx, int offset)
00973 {
00974 int sum;
00975
00976 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00977 error("invalid argidx %d", argidx);
00978 }
00979 const CmdStruct *cs = _cur_pcs.cmd[argidx];
00980 if (cs != NULL && cs->consumes <= offset) {
00981 error("invalid argidx offset %d:%d", argidx, offset);
00982 }
00983
00984 if (_cur_pcs.cmd[argidx] == NULL) {
00985 error("no command for this argidx %d", argidx);
00986 }
00987
00988 for (int i = sum = 0; i < argidx; i++) {
00989 const CmdStruct *cs = _cur_pcs.cmd[i];
00990
00991 sum += (cs != NULL) ? cs->consumes : 1;
00992 }
00993
00994 return sum + offset;
00995 }
00996
00997 static void PutArgidxCommand()
00998 {
00999 PutUtf8(SCC_ARG_INDEX);
01000 PutByte(TranslateArgumentIdx(_cur_argidx));
01001 }
01002
01003
01004 static void PutCommandString(const char *str)
01005 {
01006 _cur_argidx = 0;
01007
01008 while (*str != '\0') {
01009
01010 if (*str != '{') {
01011 PutByte(*str++);
01012 continue;
01013 }
01014
01015 char param[256];
01016 int argno;
01017 int casei;
01018 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
01019 if (cs == NULL) break;
01020
01021 if (casei != -1) {
01022 PutUtf8(SCC_SETCASE);
01023 PutByte(casei);
01024 }
01025
01026
01027 if (cs->consumes > 0) {
01028
01029 if (argno != -1 && argno != _cur_argidx) {
01030 _cur_argidx = argno;
01031 PutArgidxCommand();
01032 }
01033
01034
01035 cs = _cur_pcs.cmd[_cur_argidx++];
01036 if (cs == NULL) {
01037 error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01038 }
01039 }
01040
01041 cs->proc(param, cs->value);
01042 }
01043 }
01044
01045 static void WriteLength(FILE *f, uint length)
01046 {
01047 if (length < 0xC0) {
01048 fputc(length, f);
01049 } else if (length < 0x4000) {
01050 fputc((length >> 8) | 0xC0, f);
01051 fputc(length & 0xFF, f);
01052 } else {
01053 error("string too long");
01054 }
01055 }
01056
01057
01058 static void WriteLangfile(const char *filename)
01059 {
01060 uint in_use[32];
01061 LanguagePackHeader hdr;
01062
01063 _output_filename = filename;
01064 _output_file = fopen(filename, "wb");
01065 if (_output_file == NULL) error("can't open %s", filename);
01066
01067 memset(&hdr, 0, sizeof(hdr));
01068 for (int i = 0; i != 32; i++) {
01069 uint n = CountInUse(i);
01070
01071 in_use[i] = n;
01072 hdr.offsets[i] = TO_LE16(n);
01073 }
01074
01075
01076 hdr.ident = TO_LE32(0x474E414C);
01077 hdr.version = TO_LE32(_hash);
01078 hdr.plural_form = _lang_pluralform;
01079 hdr.text_dir = _lang_textdir;
01080 hdr.winlangid = TO_LE16(_lang_winlangid);
01081 hdr.newgrflangid = _lang_newgrflangid;
01082 strecpy(hdr.name, _lang_name, lastof(hdr.name));
01083 strecpy(hdr.own_name, _lang_ownname, lastof(hdr.own_name));
01084 strecpy(hdr.isocode, _lang_isocode, lastof(hdr.isocode));
01085 strecpy(hdr.digit_group_separator, _lang_digit_group_separator, lastof(hdr.digit_group_separator));
01086 strecpy(hdr.digit_group_separator_currency, _lang_digit_group_separator_currency, lastof(hdr.digit_group_separator_currency));
01087 strecpy(hdr.digit_decimal_separator, _lang_digit_decimal_separator, lastof(hdr.digit_decimal_separator));
01088
01089 fwrite(&hdr, sizeof(hdr), 1, _output_file);
01090
01091 for (int i = 0; i != 32; i++) {
01092 for (uint j = 0; j != in_use[i]; j++) {
01093 const LangString *ls = _strings[(i << 11) + j];
01094 const Case *casep;
01095 const char *cmdp;
01096
01097
01098 if (ls == NULL) {
01099 WriteLength(_output_file, 0);
01100 continue;
01101 }
01102
01103 _cur_ident = ls->name;
01104 _cur_line = ls->line;
01105
01106
01107 if (_show_todo > 0 && ls->translated == NULL) {
01108 if ((_show_todo & 2) != 0) {
01109 strgen_warning("'%s' is untranslated", ls->name);
01110 }
01111 if ((_show_todo & 1) != 0) {
01112 const char *s = "<TODO> ";
01113 while (*s != '\0') PutByte(*s++);
01114 }
01115 }
01116
01117
01118 ExtractCommandString(&_cur_pcs, ls->english, false);
01119
01120 if (ls->translated_case != NULL || ls->translated != NULL) {
01121 casep = ls->translated_case;
01122 cmdp = ls->translated;
01123 } else {
01124 casep = ls->english_case;
01125 cmdp = ls->english;
01126 }
01127
01128 _translated = _masterlang || (cmdp != ls->english);
01129
01130 if (casep != NULL) {
01131 const Case *c;
01132 uint num;
01133
01134
01135
01136
01137
01138 PutUtf8(SCC_SWITCH_CASE);
01139
01140 for (num = 0, c = casep; c; c = c->next) num++;
01141 PutByte(num);
01142
01143
01144 for (c = casep; c != NULL; c = c->next) {
01145 int pos;
01146
01147 PutByte(c->caseidx);
01148
01149 pos = _put_pos;
01150 PutByte(0);
01151 PutByte(0);
01152
01153 PutCommandString(c->string);
01154 PutByte(0);
01155
01156 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01157 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01158 }
01159 }
01160
01161 if (cmdp != NULL) PutCommandString(cmdp);
01162
01163 WriteLength(_output_file, _put_pos);
01164 fwrite(_put_buf, 1, _put_pos, _output_file);
01165 _put_pos = 0;
01166 }
01167 }
01168
01169 fputc(0, _output_file);
01170 fclose(_output_file);
01171
01172 _output_file = NULL;
01173 _output_filename = NULL;
01174 }
01175
01177 static inline void ottd_mkdir(const char *directory)
01178 {
01179 #if defined(WIN32) || defined(__WATCOMC__)
01180 mkdir(directory);
01181 #else
01182 mkdir(directory, 0755);
01183 #endif
01184 }
01185
01189 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01190 {
01191 ttd_strlcpy(buf, path, buflen);
01192
01193 char *p = strchr(buf, '\0');
01194 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01195 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01196 return buf;
01197 }
01198
01199 #if defined(__MINGW32__)
01200
01205 static inline char *replace_pathsep(char *s)
01206 {
01207 for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01208 return s;
01209 }
01210 #else
01211 static inline char *replace_pathsep(char *s) { return s; }
01212 #endif
01213
01214 int CDECL main(int argc, char *argv[])
01215 {
01216 char pathbuf[MAX_PATH];
01217 const char *src_dir = ".";
01218 const char *dest_dir = NULL;
01219
01220 while (argc > 1 && *argv[1] == '-') {
01221 if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01222 puts("$Revision: 19114 $");
01223 return 0;
01224 }
01225
01226 if (strcmp(argv[1], "-export-commands") == 0) {
01227 printf("args\tflags\tcommand\treplacement\n");
01228 for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
01229 char flags;
01230 switch (cs->value) {
01231 case 0x200E: case 0x200F:
01232 case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E:
01233 case 0xA0:
01234 case '\n':
01235 case '{':
01236
01237 flags = 'i';
01238 break;
01239
01240 default:
01241 if (cs->proc == EmitGender) {
01242 flags = 'g';
01243 } else if (cs->proc == EmitPlural) {
01244 flags = 'p';
01245 } else {
01246 flags = '0';
01247 }
01248 }
01249 printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
01250 }
01251 return 0;
01252 }
01253
01254 if (strcmp(argv[1], "-export-plurals") == 0) {
01255 printf("count\tdescription\n");
01256 for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
01257 printf("%i\t\"%s\"\n", pf->plural_count, pf->description);
01258 }
01259 return 0;
01260 }
01261
01262 if (strcmp(argv[1], "-export-pragmas") == 0) {
01263 printf("name\tflags\tdefault\tdescription\n");
01264 for (size_t i = 0; i < lengthof(_pragmas); i++) {
01265 printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
01266 _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
01267 }
01268 return 0;
01269 }
01270
01271 if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01272 _show_todo |= 1;
01273 argc--, argv++;
01274 continue;
01275 }
01276
01277 if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01278 _show_todo |= 2;
01279 argc--, argv++;
01280 continue;
01281 }
01282
01283 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01284 puts(
01285 "strgen - $Revision: 19114 $\n"
01286 " -v | --version print version information and exit\n"
01287 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01288 " -w | --warning print a warning for any untranslated strings\n"
01289 " -h | -? | --help print this help message and exit\n"
01290 " -s | --source_dir search for english.txt in the specified directory\n"
01291 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01292 " -export-commands export all commands and exit\n"
01293 " -export-plurals export all plural forms and exit\n"
01294 " -export-pragmas export all pragmas and exit\n"
01295 " Run without parameters and strgen will search for english.txt and parse it,\n"
01296 " creating strings.h. Passing an argument, strgen will translate that language\n"
01297 " file using english.txt as a reference and output <language>.lng."
01298 );
01299 return 0;
01300 }
01301
01302 if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01303 src_dir = replace_pathsep(argv[2]);
01304 argc -= 2, argv += 2;
01305 continue;
01306 }
01307
01308 if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01309 dest_dir = replace_pathsep(argv[2]);
01310 argc -= 2, argv += 2;
01311 continue;
01312 }
01313
01314 fprintf(stderr, "Invalid arguments\n");
01315 return 0;
01316 }
01317
01318 if (dest_dir == NULL) dest_dir = src_dir;
01319
01320
01321
01322
01323
01324 if (argc == 1) {
01325 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01326
01327
01328 _masterlang = true;
01329 ParseFile(pathbuf, true);
01330 MakeHashOfStrings();
01331 if (_errors) return 1;
01332
01333
01334 ottd_mkdir(dest_dir);
01335 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01336 WriteStringsH(pathbuf);
01337 } else if (argc == 2) {
01338 char *r;
01339
01340 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01341
01342
01343 _masterlang = false;
01344 ParseFile(pathbuf, true);
01345 MakeHashOfStrings();
01346 ParseFile(replace_pathsep(argv[1]), false);
01347 if (_errors) return 1;
01348
01349
01350 r = strrchr(argv[1], PATHSEPCHAR);
01351 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01352
01353
01354 r = strrchr(pathbuf, '.');
01355 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01356 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01357 WriteLangfile(pathbuf);
01358
01359
01360 if ((_show_todo & 2) != 0) {
01361 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01362 }
01363 } else {
01364 fprintf(stderr, "Invalid arguments\n");
01365 }
01366
01367 return 0;
01368 }