summaryrefslogtreecommitdiffstats
path: root/src/iso5426converter.cpp
blob: cb0eca4c54ed465f2771db8df31bfa49ce468690 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
/***************************************************************************
    copyright            : (C) 2006 by Robby Stephenson
    email                : [email protected]
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This file has been modified to match the requirements of KBibTeX.     *
 *   In case of problems or bugs arising from this implementation, please  *
 *   contact the KBibTeX team first.                                       *
 *                             Thomas Fischer <[email protected]>  *
 *                                                                         *
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of version 2 of the GNU General Public License as  *
 *   published by the Free Software Foundation;                            *
 *                                                                         *
 ***************************************************************************/

// This class is adapted from Iso5426ToUnicode from the MARC4J project, available
// from http://marc4j.tigris.org, with the following notice:
// * Copyright (C) 2002 Bas  Peters  ([email protected])
// * Copyright (C) 2002 Yves Pratter ([email protected])
//
// That source was released under the terms of the GNU Lesser General Public
// License, version 2.1. In accordance with Condition 3 of that license,
// I am applying the terms of the GNU General Public License to the source
// code, and including a large portion of it here

#include "iso5426converter.h"
#include <kdebug.h>

#include <tqstring.h>

using KBibTeX::Iso5426Converter;

TQString Iso5426Converter::toUtf8(const TQCString& text_) {
  const uint len = text_.length();
  TQString result;
  result.reserve(len);
  uint pos = 0;
  for(uint i = 0; i < len; ++i) {
    uchar c = text_[i];
    if(isAscii(c)) {
      result[pos++] = c;
    } else if(isCombining(c) && hasNext(i, len)) {
      // this is a hack
      // use the diaeresis instead of umlaut
      // works for SUDOC
      if(c == 0xC9) {
        c = 0xC8;
      }
      TQChar d = getCombiningChar(c * 256 + text_[i + 1]);
      if(!d.isNull()) {
        result[pos++] = d;
        ++i;
      } else {
        result[pos++] = getChar(c);
      }
    } else {
      result[pos++] = getChar(c);
    }
  }
  result.squeeze();
  return result;
}

inline
bool Iso5426Converter::hasNext(uint pos, uint len) {
  return pos < (len - 1);
}

inline
bool Iso5426Converter::isAscii(uchar c) {
  return c <= 0x7F;
}

inline
bool Iso5426Converter::isCombining(uchar c) {
  return c >= 0xC0 && c <= 0xDF;
}

// Source : http://www.itscj.ipsj.or.jp/ISO-IR/053.pdf
TQChar Iso5426Converter::getChar(uchar c) {
  switch(c) {
  case 0xA1:
    return 0x00A1; // 2/1 inverted exclamation mark
  case 0xA2:
    return 0x201C; // 2/2 left low double quotation mark
  case 0xA3:
    return 0x00A3; // 2/3 pound sign
  case 0xA4:
    return 0x0024; // 2/4 dollar sign
  case 0xA5:
    return 0x00A5; // 2/5 yen sign
  case 0xA6:
    return 0x2020; // 2/6 single dagger
  case 0xA7:
    return 0x00A7; // 2/7 paragraph (section)
  case 0xA8:
    return 0x2032; // 2/8 prime
  case 0xA9:
    return 0x2018; // 2/9 left high single quotation mark
  case 0xAA:
    return 0x201C; // 2/10 left high double quotation mark
  case 0xAB:
    return 0x00AB; // 2/11 left angle quotation mark
  case 0xAC:
    return 0x266D; // 2/12 music flat
  case 0xAD:
    return 0x00A9; // 2/13 copyright sign
  case 0xAE:
    return 0x2117; // 2/14 sound recording copyright sign
  case 0xAF:
    return 0x00AE; // 2/15 trade mark sign

  case 0xB0:
    return 0x0639; // 3/0 ayn [ain]
  case 0xB1:
    return 0x0623; // 3/1 alif/hamzah [alef with hamza above]
  case 0xB2:
    return 0x2018; // 3/2 left low single quotation mark
  // 3/3 (this position shall not be used)
  // 3/4 (this position shall not be used)
  // 3/5 (this position shall not be used)
  case 0xB6:
    return 0x2021; // 3/6 double dagger
  case 0xB7:
    return 0x00B7; // 3/7 middle dot
  case 0xB8:
    return 0x2033; // 3/8 double prime
  case 0xB9:
    return 0x2019; // 3/9 right high single quotation mark
  case 0xBA:
    return 0x201D; // 3/10 right high double quotation mark
  case 0xBB:
    return 0x00BB; // 3/11 right angle quotation mark
  case 0xBC:
    return 0x266F; // 3/12 musical sharp
  case 0xBD:
    return 0x02B9; // 3/13 mjagkij znak
  case 0xBE:
    return 0x02BA; // 3/14 tverdyj znak
  case 0xBF:
    return 0x00BF; // 3/15 inverted question mark

  // 4/0 to 5/15 diacritic characters

  // 6/0 (this position shall not be used)
  case 0xE1:
    return 0x00C6; // 6/1 CAPITAL DIPHTHONG A WITH E
  case 0xE2:
    return 0x0110; // 6/2 CAPITAL LETTER D WITH STROKE
  // 6/3 (this position shall not be used)
  // 6/4 (this position shall not be used)
  // 6/5 (this position shall not be used)
  case 0xE6:
    return 0x0132; // 6/6 CAPITAL LETTER IJ
  // 6/7 (this position shall not be used)
  case 0xE8:
    return 0x0141; // 6/8 CAPITAL LETTER L WITH STROKE
  case 0xE9:
    return 0x00D8; // 6/9 CAPITAL LETTER O WITH SOLIDUS [oblique stroke]
  case 0xEA:
    return 0x0152; // 6/10 CAPITAL DIPHTONG OE
  // 6/11 (this position shall not be used)
  case 0xEC:
    return 0x00DE; // 6/12 CAPITAL LETTER THORN
  // 6/13 (this position shall not be used)
  // 6/14 (this position shall not be used)
  // 6/15 (this position shall not be used)

  // 7/0 (this position shall not be used)
  case 0xF1:
    return 0x00E6; // 7/1 small diphthong a with e
  // 7/4 (this position shall not be used)
  case 0xF5:
    return 0x0131; // 7/5 small letter i without dot
  case 0xF6:
    return 0x0133; // 7/6 small letter ij
  // 7/7 (this position shall not be used)
  case 0xF8:
    return 0x0142; // 7/8 small letter l with stroke
  case 0xF9:
    return 0x00F8; // 7/9 small letter o with solidus (oblique stroke)
  case 0xFA:
    return 0x0153; // 7/10 small diphtong oe
  case 0xFB:
    return 0x00DF; // 7/11 small letter sharp s
  case 0xFC:
    return 0x00FE; // 7/12 small letter thorn
  // 7/13 (this position shall not be used)
  // 7/14 (this position shall not be used)
  default:
    return c;
  }
}

TQChar Iso5426Converter::getCombiningChar(uint c) {
  switch(c) {
  // 4/0 low rising tone mark
  case 0xC041:
    return 0x1EA2; // CAPITAL A WITH HOOK ABOVE
  case 0xC045:
    return 0x1EBA; // CAPITAL E WITH HOOK ABOVE
  case 0xC049:
    return 0x1EC8; // CAPITAL I WITH HOOK ABOVE
  case 0xC04F:
    return 0x1ECE; // CAPITAL O WITH HOOK ABOVE
  case 0xC055:
    return 0x1EE6; // CAPITAL U WITH HOOK ABOVE
  case 0xC059:
    return 0x1EF6; // CAPITAL Y WITH HOOK ABOVE
  case 0xC061:
    return 0x1EA3; // small a with hook above
  case 0xC065:
    return 0x1EBB; // small e with hook above
  case 0xC069:
    return 0x1EC9; // small i with hook above
  case 0xC06F:
    return 0x1ECF; // small o with hook above
  case 0xC075:
    return 0x1EE7; // small u with hook above
  case 0xC079:
    return 0x1EF7; // small y with hook above

  // 4/1 grave accent
  case 0xC141:
    return 0x00C0; // CAPITAL A WITH GRAVE ACCENT
  case 0xC145:
    return 0x00C8; // CAPITAL E WITH GRAVE ACCENT
  case 0xC149:
    return 0x00CC; // CAPITAL I WITH GRAVE ACCENT
  case 0xC14F:
    return 0x00D2; // CAPITAL O WITH GRAVE ACCENT
  case 0xC155:
    return 0x00D9; // CAPITAL U WITH GRAVE ACCENT
  case 0xC157:
    return 0x1E80; // CAPITAL W WITH GRAVE
  case 0xC159:
    return 0x1EF2; // CAPITAL Y WITH GRAVE
  case 0xC161:
    return 0x00E0; // small a with grave accent
  case 0xC165:
    return 0x00E8; // small e with grave accent
  case 0xC169:
    return 0x00EC; // small i with grave accent
  case 0xC16F:
    return 0x00F2; // small o with grave accent
  case 0xC175:
    return 0x00F9; // small u with grave accent
  case 0xC177:
    return 0x1E81; // small w with grave
  case 0xC179:
    return 0x1EF3; // small y with grave

  // 4/2 acute accent
  case 0xC241:
    return 0x00C1; // CAPITAL A WITH ACUTE ACCENT
  case 0xC243:
    return 0x0106; // CAPITAL C WITH ACUTE ACCENT
  case 0xC245:
    return 0x00C9; // CAPITAL E WITH ACUTE ACCENT
  case 0xC247:
    return 0x01F4; // CAPITAL G WITH ACUTE
  case 0xC249:
    return 0x00CD; // CAPITAL I WITH ACUTE ACCENT
  case 0xC24B:
    return 0x1E30; // CAPITAL K WITH ACUTE
  case 0xC24C:
    return 0x0139; // CAPITAL L WITH ACUTE ACCENT
  case 0xC24D:
    return 0x1E3E; // CAPITAL M WITH ACUTE
  case 0xC24E:
    return 0x0143; // CAPITAL N WITH ACUTE ACCENT
  case 0xC24F:
    return 0x00D3; // CAPITAL O WITH ACUTE ACCENT
  case 0xC250:
    return 0x1E54; // CAPITAL P WITH ACUTE
  case 0xC252:
    return 0x0154; // CAPITAL R WITH ACUTE ACCENT
  case 0xC253:
    return 0x015A; // CAPITAL S WITH ACUTE ACCENT
  case 0xC255:
    return 0x00DA; // CAPITAL U WITH ACUTE ACCENT
  case 0xC257:
    return 0x1E82; // CAPITAL W WITH ACUTE
  case 0xC259:
    return 0x00DD; // CAPITAL Y WITH ACUTE ACCENT
  case 0xC25A:
    return 0x0179; // CAPITAL Z WITH ACUTE ACCENT
  case 0xC261:
    return 0x00E1; // small a with acute accent
  case 0xC263:
    return 0x0107; // small c with acute accent
  case 0xC265:
    return 0x00E9; // small e with acute accent
  case 0xC267:
    return 0x01F5; // small g with acute
  case 0xC269:
    return 0x00ED; // small i with acute accent
  case 0xC26B:
    return 0x1E31; // small k with acute
  case 0xC26C:
    return 0x013A; // small l with acute accent
  case 0xC26D:
    return 0x1E3F; // small m with acute
  case 0xC26E:
    return 0x0144; // small n with acute accent
  case 0xC26F:
    return 0x00F3; // small o with acute accent
  case 0xC270:
    return 0x1E55; // small p with acute
  case 0xC272:
    return 0x0155; // small r with acute accent
  case 0xC273:
    return 0x015B; // small s with acute accent
  case 0xC275:
    return 0x00FA; // small u with acute accent
  case 0xC277:
    return 0x1E83; // small w with acute
  case 0xC279:
    return 0x00FD; // small y with acute accent
  case 0xC27A:
    return 0x017A; // small z with acute accent
  case 0xC2E1:
    return 0x01FC; // CAPITAL AE WITH ACUTE
  case 0xC2F1:
    return 0x01FD; // small ae with acute

  // 4/3 circumflex accent
  case 0xC341:
    return 0x00C2; // CAPITAL A WITH CIRCUMFLEX ACCENT
  case 0xC343:
    return 0x0108; // CAPITAL C WITH CIRCUMFLEX
  case 0xC345:
    return 0x00CA; // CAPITAL E WITH CIRCUMFLEX ACCENT
  case 0xC347:
    return 0x011C; // CAPITAL G WITH CIRCUMFLEX
  case 0xC348:
    return 0x0124; // CAPITAL H WITH CIRCUMFLEX
  case 0xC349:
    return 0x00CE; // CAPITAL I WITH CIRCUMFLEX ACCENT
  case 0xC34A:
    return 0x0134; // CAPITAL J WITH CIRCUMFLEX
  case 0xC34F:
    return 0x00D4; // CAPITAL O WITH CIRCUMFLEX ACCENT
  case 0xC353:
    return 0x015C; // CAPITAL S WITH CIRCUMFLEX
  case 0xC355:
    return 0x00DB; // CAPITAL U WITH CIRCUMFLEX
  case 0xC357:
    return 0x0174; // CAPITAL W WITH CIRCUMFLEX
  case 0xC359:
    return 0x0176; // CAPITAL Y WITH CIRCUMFLEX
  case 0xC35A:
    return 0x1E90; // CAPITAL Z WITH CIRCUMFLEX
  case 0xC361:
    return 0x00E2; // small a with circumflex accent
  case 0xC363:
    return 0x0109; // small c with circumflex
  case 0xC365:
    return 0x00EA; // small e with circumflex accent
  case 0xC367:
    return 0x011D; // small g with circumflex
  case 0xC368:
    return 0x0125; // small h with circumflex
  case 0xC369:
    return 0x00EE; // small i with circumflex accent
  case 0xC36A:
    return 0x0135; // small j with circumflex
  case 0xC36F:
    return 0x00F4; // small o with circumflex accent
  case 0xC373:
    return 0x015D; // small s with circumflex
  case 0xC375:
    return 0x00FB; // small u with circumflex
  case 0xC377:
    return 0x0175; // small w with circumflex
  case 0xC379:
    return 0x0177; // small y with circumflex
  case 0xC37A:
    return 0x1E91; // small z with circumflex

  // 4/4 tilde
  case 0xC441:
    return 0x00C3; // CAPITAL A WITH TILDE
  case 0xC445:
    return 0x1EBC; // CAPITAL E WITH TILDE
  case 0xC449:
    return 0x0128; // CAPITAL I WITH TILDE
  case 0xC44E:
    return 0x00D1; // CAPITAL N WITH TILDE
  case 0xC44F:
    return 0x00D5; // CAPITAL O WITH TILDE
  case 0xC455:
    return 0x0168; // CAPITAL U WITH TILDE
  case 0xC456:
    return 0x1E7C; // CAPITAL V WITH TILDE
  case 0xC459:
    return 0x1EF8; // CAPITAL Y WITH TILDE
  case 0xC461:
    return 0x00E3; // small a with tilde
  case 0xC465:
    return 0x1EBD; // small e with tilde
  case 0xC469:
    return 0x0129; // small i with tilde
  case 0xC46E:
    return 0x00F1; // small n with tilde
  case 0xC46F:
    return 0x00F5; // small o with tilde
  case 0xC475:
    return 0x0169; // small u with tilde
  case 0xC476:
    return 0x1E7D; // small v with tilde
  case 0xC479:
    return 0x1EF9; // small y with tilde

  // 4/5 macron
  case 0xC541:
    return 0x0100; // CAPITAL A WITH MACRON
  case 0xC545:
    return 0x0112; // CAPITAL E WITH MACRON
  case 0xC547:
    return 0x1E20; // CAPITAL G WITH MACRON
  case 0xC549:
    return 0x012A; // CAPITAL I WITH MACRON
  case 0xC54F:
    return 0x014C; // CAPITAL O WITH MACRON
  case 0xC555:
    return 0x016A; // CAPITAL U WITH MACRON
  case 0xC561:
    return 0x0101; // small a with macron
  case 0xC565:
    return 0x0113; // small e with macron
  case 0xC567:
    return 0x1E21; // small g with macron
  case 0xC569:
    return 0x012B; // small i with macron
  case 0xC56F:
    return 0x014D; // small o with macron
  case 0xC575:
    return 0x016B; // small u with macron
  case 0xC5E1:
    return 0x01E2; // CAPITAL AE WITH MACRON
  case 0xC5F1:
    return 0x01E3; // small ae with macron

  // 4/6 breve
  case 0xC641:
    return 0x0102; // CAPITAL A WITH BREVE
  case 0xC645:
    return 0x0114; // CAPITAL E WITH BREVE
  case 0xC647:
    return 0x011E; // CAPITAL G WITH BREVE
  case 0xC649:
    return 0x012C; // CAPITAL I WITH BREVE
  case 0xC64F:
    return 0x014E; // CAPITAL O WITH BREVE
  case 0xC655:
    return 0x016C; // CAPITAL U WITH BREVE
  case 0xC661:
    return 0x0103; // small a with breve
  case 0xC665:
    return 0x0115; // small e with breve
  case 0xC667:
    return 0x011F; // small g with breve
  case 0xC669:
    return 0x012D; // small i with breve
  case 0xC66F:
    return 0x014F; // small o with breve
  case 0xC675:
    return 0x016D; // small u with breve

  // 4/7 dot above
  case 0xC742:
    return 0x1E02; // CAPITAL B WITH DOT ABOVE
  case 0xC743:
    return 0x010A; // CAPITAL C WITH DOT ABOVE
  case 0xC744:
    return 0x1E0A; // CAPITAL D WITH DOT ABOVE
  case 0xC745:
    return 0x0116; // CAPITAL E WITH DOT ABOVE
  case 0xC746:
    return 0x1E1E; // CAPITAL F WITH DOT ABOVE
  case 0xC747:
    return 0x0120; // CAPITAL G WITH DOT ABOVE
  case 0xC748:
    return 0x1E22; // CAPITAL H WITH DOT ABOVE
  case 0xC749:
    return 0x0130; // CAPITAL I WITH DOT ABOVE
  case 0xC74D:
    return 0x1E40; // CAPITAL M WITH DOT ABOVE
  case 0xC74E:
    return 0x1E44; // CAPITAL N WITH DOT ABOVE
  case 0xC750:
    return 0x1E56; // CAPITAL P WITH DOT ABOVE
  case 0xC752:
    return 0x1E58; // CAPITAL R WITH DOT ABOVE
  case 0xC753:
    return 0x1E60; // CAPITAL S WITH DOT ABOVE
  case 0xC754:
    return 0x1E6A; // CAPITAL T WITH DOT ABOVE
  case 0xC757:
    return 0x1E86; // CAPITAL W WITH DOT ABOVE
  case 0xC758:
    return 0x1E8A; // CAPITAL X WITH DOT ABOVE
  case 0xC759:
    return 0x1E8E; // CAPITAL Y WITH DOT ABOVE
  case 0xC75A:
    return 0x017B; // CAPITAL Z WITH DOT ABOVE
  case 0xC762:
    return 0x1E03; // small b with dot above
  case 0xC763:
    return 0x010B; // small c with dot above
  case 0xC764:
    return 0x1E0B; // small d with dot above
  case 0xC765:
    return 0x0117; // small e with dot above
  case 0xC766:
    return 0x1E1F; // small f with dot above
  case 0xC767:
    return 0x0121; // small g with dot above
  case 0xC768:
    return 0x1E23; // small h with dot above
  case 0xC76D:
    return 0x1E41; // small m with dot above
  case 0xC76E:
    return 0x1E45; // small n with dot above
  case 0xC770:
    return 0x1E57; // small p with dot above
  case 0xC772:
    return 0x1E59; // small r with dot above
  case 0xC773:
    return 0x1E61; // small s with dot above
  case 0xC774:
    return 0x1E6B; // small t with dot above
  case 0xC777:
    return 0x1E87; // small w with dot above
  case 0xC778:
    return 0x1E8B; // small x with dot above
  case 0xC779:
    return 0x1E8F; // small y with dot above
  case 0xC77A:
    return 0x017C; // small z with dot above

  // 4/8 trema, diaresis
  case 0xC820:
    return 0x00A8; // diaeresis
  case 0xC841:
    return 0x00C4; // CAPITAL A WITH DIAERESIS
  case 0xC845:
    return 0x00CB; // CAPITAL E WITH DIAERESIS
  case 0xC848:
    return 0x1E26; // CAPITAL H WITH DIAERESIS
  case 0xC849:
    return 0x00CF; // CAPITAL I WITH DIAERESIS
  case 0xC84F:
    return 0x00D6; // CAPITAL O WITH DIAERESIS
  case 0xC855:
    return 0x00DC; // CAPITAL U WITH DIAERESIS
  case 0xC857:
    return 0x1E84; // CAPITAL W WITH DIAERESIS
  case 0xC858:
    return 0x1E8C; // CAPITAL X WITH DIAERESIS
  case 0xC859:
    return 0x0178; // CAPITAL Y WITH DIAERESIS
  case 0xC861:
    return 0x00E4; // small a with diaeresis
  case 0xC865:
    return 0x00EB; // small e with diaeresis
  case 0xC868:
    return 0x1E27; // small h with diaeresis
  case 0xC869:
    return 0x00EF; // small i with diaeresis
  case 0xC86F:
    return 0x00F6; // small o with diaeresis
  case 0xC874:
    return 0x1E97; // small t with diaeresis
  case 0xC875:
    return 0x00FC; // small u with diaeresis
  case 0xC877:
    return 0x1E85; // small w with diaeresis
  case 0xC878:
    return 0x1E8D; // small x with diaeresis
  case 0xC879:
    return 0x00FF; // small y with diaeresis

  // 4/9 umlaut
  case 0xC920:
    return 0x00A8; // [diaeresis]

  // 4/10 circle above
  case 0xCA41:
    return 0x00C5; // CAPITAL A WITH RING ABOVE
  case 0xCAAD:
    return 0x016E; // CAPITAL U WITH RING ABOVE
  case 0xCA61:
    return 0x00E5; // small a with ring above
  case 0xCA75:
    return 0x016F; // small u with ring above
  case 0xCA77:
    return 0x1E98; // small w with ring above
  case 0xCA79:
    return 0x1E99; // small y with ring above

  // 4/11 high comma off centre

  // 4/12 inverted high comma centred

  // 4/13 double acute accent
  case 0xCD4F:
    return 0x0150; // CAPITAL O WITH DOUBLE ACUTE
  case 0xCD55:
    return 0x0170; // CAPITAL U WITH DOUBLE ACUTE
  case 0xCD6F:
    return 0x0151; // small o with double acute
  case 0xCD75:
    return 0x0171; // small u with double acute

  // 4/14 horn
  case 0xCE54:
    return 0x01A0; // LATIN CAPITAL LETTER O WITH HORN
  case 0xCE55:
    return 0x01AF; // LATIN CAPITAL LETTER U WITH HORN
  case 0xCE74:
    return 0x01A1; // latin small letter o with horn
  case 0xCE75:
    return 0x01B0; // latin small letter u with horn

  // 4/15 caron (hacek)
  case 0xCF41:
    return 0x01CD; // CAPITAL A WITH CARON
  case 0xCF43:
    return 0x010C; // CAPITAL C WITH CARON
  case 0xCF44:
    return 0x010E; // CAPITAL D WITH CARON
  case 0xCF45:
    return 0x011A; // CAPITAL E WITH CARON
  case 0xCF47:
    return 0x01E6; // CAPITAL G WITH CARON
  case 0xCF49:
    return 0x01CF; // CAPITAL I WITH CARON
  case 0xCF4B:
    return 0x01E8; // CAPITAL K WITH CARON
  case 0xCF4C:
    return 0x013D; // CAPITAL L WITH CARON
  case 0xCF4E:
    return 0x0147; // CAPITAL N WITH CARON
  case 0xCF4F:
    return 0x01D1; // CAPITAL O WITH CARON
  case 0xCF52:
    return 0x0158; // CAPITAL R WITH CARON
  case 0xCF53:
    return 0x0160; // CAPITAL S WITH CARON
  case 0xCF54:
    return 0x0164; // CAPITAL T WITH CARON
  case 0xCF55:
    return 0x01D3; // CAPITAL U WITH CARON
  case 0xCF5A:
    return 0x017D; // CAPITAL Z WITH CARON
  case 0xCF61:
    return 0x01CE; // small a with caron
  case 0xCF63:
    return 0x010D; // small c with caron
  case 0xCF64:
    return 0x010F; // small d with caron
  case 0xCF65:
    return 0x011B; // small e with caron
  case 0xCF67:
    return 0x01E7; // small g with caron
  case 0xCF69:
    return 0x01D0; // small i with caron
  case 0xCF6A:
    return 0x01F0; // small j with caron
  case 0xCF6B:
    return 0x01E9; // small k with caron
  case 0xCF6C:
    return 0x013E; // small l with caron
  case 0xCF6E:
    return 0x0148; // small n with caron
  case 0xCF6F:
    return 0x01D2; // small o with caron
  case 0xCF72:
    return 0x0159; // small r with caron
  case 0xCF73:
    return 0x0161; // small s with caron
  case 0xCF74:
    return 0x0165; // small t with caron
  case 0xCF75:
    return 0x01D4; // small u with caron
  case 0xCF7A:
    return 0x017E; // small z with caron

  // 5/0 cedilla
  case 0xD020:
    return 0x00B8; // cedilla
  case 0xD043:
    return 0x00C7; // CAPITAL C WITH CEDILLA
  case 0xD044:
    return 0x1E10; // CAPITAL D WITH CEDILLA
  case 0xD047:
    return 0x0122; // CAPITAL G WITH CEDILLA
  case 0xD048:
    return 0x1E28; // CAPITAL H WITH CEDILLA
  case 0xD04B:
    return 0x0136; // CAPITAL K WITH CEDILLA
  case 0xD04C:
    return 0x013B; // CAPITAL L WITH CEDILLA
  case 0xD04E:
    return 0x0145; // CAPITAL N WITH CEDILLA
  case 0xD052:
    return 0x0156; // CAPITAL R WITH CEDILLA
  case 0xD053:
    return 0x015E; // CAPITAL S WITH CEDILLA
  case 0xD054:
    return 0x0162; // CAPITAL T WITH CEDILLA
  case 0xD063:
    return 0x00E7; // small c with cedilla
  case 0xD064:
    return 0x1E11; // small d with cedilla
  case 0xD067:
    return 0x0123; // small g with cedilla
  case 0xD068:
    return 0x1E29; // small h with cedilla
  case 0xD06B:
    return 0x0137; // small k with cedilla
  case 0xD06C:
    return 0x013C; // small l with cedilla
  case 0xD06E:
    return 0x0146; // small n with cedilla
  case 0xD072:
    return 0x0157; // small r with cedilla
  case 0xD073:
    return 0x015F; // small s with cedilla
  case 0xD074:
    return 0x0163; // small t with cedilla

  // 5/1 rude

  // 5/2 hook to left

  // 5/3 ogonek (hook to right)
  case 0xD320:
    return 0x02DB; // ogonek
  case 0xD341:
    return 0x0104; // CAPITAL A WITH OGONEK
  case 0xD345:
    return 0x0118; // CAPITAL E WITH OGONEK
  case 0xD349:
    return 0x012E; // CAPITAL I WITH OGONEK
  case 0xD34F:
    return 0x01EA; // CAPITAL O WITH OGONEK
  case 0xD355:
    return 0x0172; // CAPITAL U WITH OGONEK
  case 0xD361:
    return 0x0105; // small a with ogonek
  case 0xD365:
    return 0x0119; // small e with ogonek
  case 0xD369:
    return 0x012F; // small i with ogonek
  case 0xD36F:
    return 0x01EB; // small o with ogonek
  case 0xD375:
    return 0x0173; // small u with ogonek

  // 5/4 circle below
  case 0xD441:
    return 0x1E00; // CAPITAL A WITH RING BELOW
  case 0xD461:
    return 0x1E01; // small a with ring below

  // 5/5 half circle below
  case 0xF948:
    return 0x1E2A; // CAPITAL H WITH BREVE BELOW
  case 0xF968:
    return 0x1E2B; // small h with breve below

  // 5/6 dot below
  case 0xD641:
    return 0x1EA0; // CAPITAL A WITH DOT BELOW
  case 0xD642:
    return 0x1E04; // CAPITAL B WITH DOT BELOW
  case 0xD644:
    return 0x1E0C; // CAPITAL D WITH DOT BELOW
  case 0xD645:
    return 0x1EB8; // CAPITAL E WITH DOT BELOW
  case 0xD648:
    return 0x1E24; // CAPITAL H WITH DOT BELOW
  case 0xD649:
    return 0x1ECA; // CAPITAL I WITH DOT BELOW
  case 0xD64B:
    return 0x1E32; // CAPITAL K WITH DOT BELOW
  case 0xD64C:
    return 0x1E36; // CAPITAL L WITH DOT BELOW
  case 0xD64D:
    return 0x1E42; // CAPITAL M WITH DOT BELOW
  case 0xD64E:
    return 0x1E46; // CAPITAL N WITH DOT BELOW
  case 0xD64F:
    return 0x1ECC; // CAPITAL O WITH DOT BELOW
  case 0xD652:
    return 0x1E5A; // CAPITAL R WITH DOT BELOW
  case 0xD653:
    return 0x1E62; // CAPITAL S WITH DOT BELOW
  case 0xD654:
    return 0x1E6C; // CAPITAL T WITH DOT BELOW
  case 0xD655:
    return 0x1EE4; // CAPITAL U WITH DOT BELOW
  case 0xD656:
    return 0x1E7E; // CAPITAL V WITH DOT BELOW
  case 0xD657:
    return 0x1E88; // CAPITAL W WITH DOT BELOW
  case 0xD659:
    return 0x1EF4; // CAPITAL Y WITH DOT BELOW
  case 0xD65A:
    return 0x1E92; // CAPITAL Z WITH DOT BELOW
  case 0xD661:
    return 0x1EA1; // small a with dot below
  case 0xD662:
    return 0x1E05; // small b with dot below
  case 0xD664:
    return 0x1E0D; // small d with dot below
  case 0xD665:
    return 0x1EB9; // small e with dot below
  case 0xD668:
    return 0x1E25; // small h with dot below
  case 0xD669:
    return 0x1ECB; // small i with dot below
  case 0xD66B:
    return 0x1E33; // small k with dot below
  case 0xD66C:
    return 0x1E37; // small l with dot below
  case 0xD66D:
    return 0x1E43; // small m with dot below
  case 0xD66E:
    return 0x1E47; // small n with dot below
  case 0xD66F:
    return 0x1ECD; // small o with dot below
  case 0xD672:
    return 0x1E5B; // small r with dot below
  case 0xD673:
    return 0x1E63; // small s with dot below
  case 0xD674:
    return 0x1E6D; // small t with dot below
  case 0xD675:
    return 0x1EE5; // small u with dot below
  case 0xD676:
    return 0x1E7F; // small v with dot below
  case 0xD677:
    return 0x1E89; // small w with dot below
  case 0xD679:
    return 0x1EF5; // small y with dot below
  case 0xD67A:
    return 0x1E93; // small z with dot below

  // 5/7 double dot below
  case 0xD755:
    return 0x1E72; // CAPITAL U WITH DIAERESIS BELOW
  case 0xD775:
    return 0x1E73; // small u with diaeresis below

  // 5/8 underline
  case 0xD820:
    return 0x005F; // underline

  // 5/9 double underline
  case 0xD920:
    return 0x2017; // double underline

  // 5/10 small low vertical bar
  case 0xDA20:
    return 0x02CC; //

  // 5/11 circumflex below

  // 5/12 (this position shall not be used)

  // 5/13 left half of ligature sign and of double tilde

  // 5/14 right half of ligature sign

  // 5/15 right half of double tilde

  default:
    kdDebug() << "Iso5426Converter::getCombiningChar() - no match for " << c << endl;
    return TQChar();
  }
}