You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

DecodedBitStreamParser.cs 19 kB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * Copyright 2007 ZXing authors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. using System;
  17. using System.Collections.Generic;
  18. using System.Text;
  19. using ZXing.Common;
  20. namespace ZXing.QrCode.Internal
  21. {
  22. /// <summary> <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
  23. /// in one QR Code. This class decodes the bits back into text.</p>
  24. ///
  25. /// <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
  26. /// <author>Sean Owen</author>
  27. /// </summary>
  28. internal static class DecodedBitStreamParser
  29. {
  30. /// <summary>
  31. /// See ISO 18004:2006, 6.4.4 Table 5
  32. /// </summary>
  33. private static readonly char[] ALPHANUMERIC_CHARS = {
  34. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
  35. 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  36. 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  37. ' ', '$', '%', '*', '+', '-', '.', '/', ':'
  38. };
  39. private const int GB2312_SUBSET = 1;
  40. internal static DecoderResult decode(byte[] bytes,
  41. Version version,
  42. ErrorCorrectionLevel ecLevel,
  43. IDictionary<DecodeHintType, object> hints)
  44. {
  45. var bits = new BitSource(bytes);
  46. var result = new StringBuilder(50);
  47. var byteSegments = new List<byte[]>(1);
  48. var symbolSequence = -1;
  49. var parityData = -1;
  50. try
  51. {
  52. // CharacterSetECI currentCharacterSetECI = null;
  53. bool fc1InEffect = false;
  54. Mode mode;
  55. do
  56. {
  57. // While still another segment to read...
  58. if (bits.available() < 4)
  59. {
  60. // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
  61. mode = Mode.TERMINATOR;
  62. }
  63. else
  64. {
  65. try
  66. {
  67. mode = Mode.forBits(bits.readBits(4)); // mode is encoded by 4 bits
  68. }
  69. catch (ArgumentException)
  70. {
  71. return null;
  72. }
  73. }
  74. if (mode != Mode.TERMINATOR)
  75. {
  76. if (mode == Mode.FNC1_FIRST_POSITION || mode == Mode.FNC1_SECOND_POSITION)
  77. {
  78. // We do little with FNC1 except alter the parsed result a bit according to the spec
  79. fc1InEffect = true;
  80. }
  81. else if (mode == Mode.STRUCTURED_APPEND)
  82. {
  83. if (bits.available() < 16)
  84. {
  85. return null;
  86. }
  87. // not really supported; but sequence number and parity is added later to the result metadata
  88. // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
  89. symbolSequence = bits.readBits(8);
  90. parityData = bits.readBits(8);
  91. }
  92. else if (mode == Mode.ECI)
  93. {
  94. /*
  95. // Count doesn't apply to ECI
  96. int value = parseECIValue(bits);
  97. currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value);
  98. if (currentCharacterSetECI == null)
  99. {
  100. return null;
  101. }
  102. * */
  103. }
  104. else
  105. {
  106. // First handle Hanzi mode which does not start with character count
  107. if (mode == Mode.HANZI)
  108. {
  109. //chinese mode contains a sub set indicator right after mode indicator
  110. int subset = bits.readBits(4);
  111. int countHanzi = bits.readBits(mode.getCharacterCountBits(version));
  112. if (subset == GB2312_SUBSET)
  113. {
  114. if (!decodeHanziSegment(bits, result, countHanzi))
  115. return null;
  116. }
  117. }
  118. else
  119. {
  120. // "Normal" QR code modes:
  121. // How many characters will follow, encoded in this mode?
  122. int count = bits.readBits(mode.getCharacterCountBits(version));
  123. if (mode == Mode.NUMERIC)
  124. {
  125. if (!decodeNumericSegment(bits, result, count))
  126. return null;
  127. }
  128. else if (mode == Mode.ALPHANUMERIC)
  129. {
  130. if (!decodeAlphanumericSegment(bits, result, count, fc1InEffect))
  131. return null;
  132. }
  133. else if (mode == Mode.BYTE)
  134. {
  135. if (!decodeByteSegment(bits, result, count, byteSegments, hints))
  136. return null;
  137. }
  138. else if (mode == Mode.KANJI)
  139. {
  140. if (!decodeKanjiSegment(bits, result, count))
  141. return null;
  142. }
  143. else
  144. {
  145. return null;
  146. }
  147. }
  148. }
  149. }
  150. } while (mode != Mode.TERMINATOR);
  151. }
  152. catch (ArgumentException)
  153. {
  154. // from readBits() calls
  155. return null;
  156. }
  157. #if WindowsCE
  158. var resultString = result.ToString().Replace("\n", "\r\n");
  159. #else
  160. var resultString = result.ToString().Replace("\r\n", "\n").Replace("\n", Environment.NewLine);
  161. #endif
  162. return new DecoderResult(bytes,
  163. resultString,
  164. byteSegments.Count == 0 ? null : byteSegments,
  165. ecLevel == null ? null : ecLevel.ToString(),
  166. symbolSequence, parityData);
  167. }
  168. /// <summary>
  169. /// See specification GBT 18284-2000
  170. /// </summary>
  171. /// <param name="bits">The bits.</param>
  172. /// <param name="result">The result.</param>
  173. /// <param name="count">The count.</param>
  174. /// <returns></returns>
  175. private static bool decodeHanziSegment(BitSource bits,
  176. StringBuilder result,
  177. int count)
  178. {
  179. // Don't crash trying to read more bits than we have available.
  180. if (count * 13 > bits.available())
  181. {
  182. return false;
  183. }
  184. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  185. // and decode as GB2312 afterwards
  186. byte[] buffer = new byte[2 * count];
  187. int offset = 0;
  188. while (count > 0)
  189. {
  190. // Each 13 bits encodes a 2-byte character
  191. int twoBytes = bits.readBits(13);
  192. int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060);
  193. if (assembledTwoBytes < 0x003BF)
  194. {
  195. // In the 0xA1A1 to 0xAAFE range
  196. assembledTwoBytes += 0x0A1A1;
  197. }
  198. else
  199. {
  200. // In the 0xB0A1 to 0xFAFE range
  201. assembledTwoBytes += 0x0A6A1;
  202. }
  203. buffer[offset] = (byte)((assembledTwoBytes >> 8) & 0xFF);
  204. buffer[offset + 1] = (byte)(assembledTwoBytes & 0xFF);
  205. offset += 2;
  206. count--;
  207. }
  208. try
  209. {
  210. result.Append(Encoding.GetEncoding(StringUtils.GB2312).GetString(buffer, 0, buffer.Length));
  211. }
  212. #if (WINDOWS_PHONE70 || WINDOWS_PHONE71 || SILVERLIGHT4 || SILVERLIGHT5 || NETFX_CORE || MONOANDROID || MONOTOUCH)
  213. catch (ArgumentException)
  214. {
  215. try
  216. {
  217. // Silverlight only supports a limited number of character sets, trying fallback to UTF-8
  218. result.Append(Encoding.GetEncoding("UTF-8").GetString(buffer, 0, buffer.Length));
  219. }
  220. catch (Exception)
  221. {
  222. return false;
  223. }
  224. }
  225. #endif
  226. catch (Exception)
  227. {
  228. return false;
  229. }
  230. return true;
  231. }
  232. private static bool decodeKanjiSegment(BitSource bits,
  233. StringBuilder result,
  234. int count)
  235. {
  236. // Don't crash trying to read more bits than we have available.
  237. if (count * 13 > bits.available())
  238. {
  239. return false;
  240. }
  241. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  242. // and decode as Shift_JIS afterwards
  243. byte[] buffer = new byte[2 * count];
  244. int offset = 0;
  245. while (count > 0)
  246. {
  247. // Each 13 bits encodes a 2-byte character
  248. int twoBytes = bits.readBits(13);
  249. int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
  250. if (assembledTwoBytes < 0x01F00)
  251. {
  252. // In the 0x8140 to 0x9FFC range
  253. assembledTwoBytes += 0x08140;
  254. }
  255. else
  256. {
  257. // In the 0xE040 to 0xEBBF range
  258. assembledTwoBytes += 0x0C140;
  259. }
  260. buffer[offset] = (byte)(assembledTwoBytes >> 8);
  261. buffer[offset + 1] = (byte)assembledTwoBytes;
  262. offset += 2;
  263. count--;
  264. }
  265. // Shift_JIS may not be supported in some environments:
  266. try
  267. {
  268. result.Append(Encoding.GetEncoding(StringUtils.SHIFT_JIS).GetString(buffer, 0, buffer.Length));
  269. }
  270. #if (WINDOWS_PHONE70 || WINDOWS_PHONE71 || SILVERLIGHT4 || SILVERLIGHT5 || NETFX_CORE || MONOANDROID || MONOTOUCH)
  271. catch (ArgumentException)
  272. {
  273. try
  274. {
  275. // Silverlight only supports a limited number of character sets, trying fallback to UTF-8
  276. result.Append(Encoding.GetEncoding("UTF-8").GetString(buffer, 0, buffer.Length));
  277. }
  278. catch (Exception)
  279. {
  280. return false;
  281. }
  282. }
  283. #endif
  284. catch (Exception)
  285. {
  286. return false;
  287. }
  288. return true;
  289. }
  290. private static bool decodeByteSegment(BitSource bits,
  291. StringBuilder result,
  292. int count,
  293. IList<byte[]> byteSegments,
  294. IDictionary<DecodeHintType, object> hints)
  295. {
  296. // Don't crash trying to read more bits than we have available.
  297. if (count << 3 > bits.available())
  298. {
  299. return false;
  300. }
  301. byte[] readBytes = new byte[count];
  302. for (int i = 0; i < count; i++)
  303. {
  304. readBytes[i] = (byte)bits.readBits(8);
  305. }
  306. String encoding;
  307. encoding = StringUtils.guessEncoding(readBytes, hints);
  308. try
  309. {
  310. result.Append(Encoding.GetEncoding(encoding).GetString(readBytes, 0, readBytes.Length));
  311. }
  312. #if (WINDOWS_PHONE70 || WINDOWS_PHONE71 || SILVERLIGHT4 || SILVERLIGHT5 || NETFX_CORE || MONOANDROID || MONOTOUCH)
  313. catch (ArgumentException)
  314. {
  315. try
  316. {
  317. // Silverlight only supports a limited number of character sets, trying fallback to UTF-8
  318. result.Append(Encoding.GetEncoding("UTF-8").GetString(readBytes, 0, readBytes.Length));
  319. }
  320. catch (Exception)
  321. {
  322. return false;
  323. }
  324. }
  325. #endif
  326. #if WindowsCE
  327. catch (PlatformNotSupportedException)
  328. {
  329. try
  330. {
  331. // WindowsCE doesn't support all encodings. But it is device depended.
  332. // So we try here the some different ones
  333. if (encoding == "ISO-8859-1")
  334. {
  335. result.Append(Encoding.GetEncoding(1252).GetString(readBytes, 0, readBytes.Length));
  336. }
  337. else
  338. {
  339. result.Append(Encoding.GetEncoding("UTF-8").GetString(readBytes, 0, readBytes.Length));
  340. }
  341. }
  342. catch (Exception)
  343. {
  344. return false;
  345. }
  346. }
  347. #endif
  348. catch (Exception)
  349. {
  350. return false;
  351. }
  352. byteSegments.Add(readBytes);
  353. return true;
  354. }
  355. private static char toAlphaNumericChar(int value)
  356. {
  357. if (value >= ALPHANUMERIC_CHARS.Length)
  358. {
  359. //throw FormatException.Instance;
  360. }
  361. return ALPHANUMERIC_CHARS[value];
  362. }
  363. private static bool decodeAlphanumericSegment(BitSource bits,
  364. StringBuilder result,
  365. int count,
  366. bool fc1InEffect)
  367. {
  368. // Read two characters at a time
  369. int start = result.Length;
  370. while (count > 1)
  371. {
  372. if (bits.available() < 11)
  373. {
  374. return false;
  375. }
  376. int nextTwoCharsBits = bits.readBits(11);
  377. result.Append(toAlphaNumericChar(nextTwoCharsBits / 45));
  378. result.Append(toAlphaNumericChar(nextTwoCharsBits % 45));
  379. count -= 2;
  380. }
  381. if (count == 1)
  382. {
  383. // special case: one character left
  384. if (bits.available() < 6)
  385. {
  386. return false;
  387. }
  388. result.Append(toAlphaNumericChar(bits.readBits(6)));
  389. }
  390. // See section 6.4.8.1, 6.4.8.2
  391. if (fc1InEffect)
  392. {
  393. // We need to massage the result a bit if in an FNC1 mode:
  394. for (int i = start; i < result.Length; i++)
  395. {
  396. if (result[i] == '%')
  397. {
  398. if (i < result.Length - 1 && result[i + 1] == '%')
  399. {
  400. // %% is rendered as %
  401. result.Remove(i + 1, 1);
  402. }
  403. else
  404. {
  405. // In alpha mode, % should be converted to FNC1 separator 0x1D
  406. result.Remove(i, 1);
  407. result.Insert(i, new[] { (char)0x1D });
  408. }
  409. }
  410. }
  411. }
  412. return true;
  413. }
  414. private static bool decodeNumericSegment(BitSource bits,
  415. StringBuilder result,
  416. int count)
  417. {
  418. // Read three digits at a time
  419. while (count >= 3)
  420. {
  421. // Each 10 bits encodes three digits
  422. if (bits.available() < 10)
  423. {
  424. return false;
  425. }
  426. int threeDigitsBits = bits.readBits(10);
  427. if (threeDigitsBits >= 1000)
  428. {
  429. return false;
  430. }
  431. result.Append(toAlphaNumericChar(threeDigitsBits / 100));
  432. result.Append(toAlphaNumericChar((threeDigitsBits / 10) % 10));
  433. result.Append(toAlphaNumericChar(threeDigitsBits % 10));
  434. count -= 3;
  435. }
  436. if (count == 2)
  437. {
  438. // Two digits left over to read, encoded in 7 bits
  439. if (bits.available() < 7)
  440. {
  441. return false;
  442. }
  443. int twoDigitsBits = bits.readBits(7);
  444. if (twoDigitsBits >= 100)
  445. {
  446. return false;
  447. }
  448. result.Append(toAlphaNumericChar(twoDigitsBits / 10));
  449. result.Append(toAlphaNumericChar(twoDigitsBits % 10));
  450. }
  451. else if (count == 1)
  452. {
  453. // One digit left over to read
  454. if (bits.available() < 4)
  455. {
  456. return false;
  457. }
  458. int digitBits = bits.readBits(4);
  459. if (digitBits >= 10)
  460. {
  461. return false;
  462. }
  463. result.Append(toAlphaNumericChar(digitBits));
  464. }
  465. return true;
  466. }
  467. private static int parseECIValue(BitSource bits)
  468. {
  469. int firstByte = bits.readBits(8);
  470. if ((firstByte & 0x80) == 0)
  471. {
  472. // just one byte
  473. return firstByte & 0x7F;
  474. }
  475. if ((firstByte & 0xC0) == 0x80)
  476. {
  477. // two bytes
  478. int secondByte = bits.readBits(8);
  479. return ((firstByte & 0x3F) << 8) | secondByte;
  480. }
  481. if ((firstByte & 0xE0) == 0xC0)
  482. {
  483. // three bytes
  484. int secondThirdBytes = bits.readBits(16);
  485. return ((firstByte & 0x1F) << 16) | secondThirdBytes;
  486. }
  487. throw new ArgumentException("Bad ECI bits starting with byte " + firstByte);
  488. }
  489. }
  490. }