You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

scannerc.go 88 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025
  1. //
  2. // Copyright (c) 2011-2019 Canonical Ltd
  3. // Copyright (c) 2006-2010 Kirill Simonov
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy of
  6. // this software and associated documentation files (the "Software"), to deal in
  7. // the Software without restriction, including without limitation the rights to
  8. // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  9. // of the Software, and to permit persons to whom the Software is furnished to do
  10. // so, subject to the following conditions:
  11. //
  12. // The above copyright notice and this permission notice shall be included in all
  13. // copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. // SOFTWARE.
  22. package yaml
  23. import (
  24. "bytes"
  25. "fmt"
  26. )
  27. // Introduction
  28. // ************
  29. //
  30. // The following notes assume that you are familiar with the YAML specification
  31. // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in
  32. // some cases we are less restrictive that it requires.
  33. //
  34. // The process of transforming a YAML stream into a sequence of events is
  35. // divided on two steps: Scanning and Parsing.
  36. //
  37. // The Scanner transforms the input stream into a sequence of tokens, while the
  38. // parser transform the sequence of tokens produced by the Scanner into a
  39. // sequence of parsing events.
  40. //
  41. // The Scanner is rather clever and complicated. The Parser, on the contrary,
  42. // is a straightforward implementation of a recursive-descendant parser (or,
  43. // LL(1) parser, as it is usually called).
  44. //
  45. // Actually there are two issues of Scanning that might be called "clever", the
  46. // rest is quite straightforward. The issues are "block collection start" and
  47. // "simple keys". Both issues are explained below in details.
  48. //
  49. // Here the Scanning step is explained and implemented. We start with the list
  50. // of all the tokens produced by the Scanner together with short descriptions.
  51. //
  52. // Now, tokens:
  53. //
  54. // STREAM-START(encoding) # The stream start.
  55. // STREAM-END # The stream end.
  56. // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
  57. // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
  58. // DOCUMENT-START # '---'
  59. // DOCUMENT-END # '...'
  60. // BLOCK-SEQUENCE-START # Indentation increase denoting a block
  61. // BLOCK-MAPPING-START # sequence or a block mapping.
  62. // BLOCK-END # Indentation decrease.
  63. // FLOW-SEQUENCE-START # '['
  64. // FLOW-SEQUENCE-END # ']'
  65. // BLOCK-SEQUENCE-START # '{'
  66. // BLOCK-SEQUENCE-END # '}'
  67. // BLOCK-ENTRY # '-'
  68. // FLOW-ENTRY # ','
  69. // KEY # '?' or nothing (simple keys).
  70. // VALUE # ':'
  71. // ALIAS(anchor) # '*anchor'
  72. // ANCHOR(anchor) # '&anchor'
  73. // TAG(handle,suffix) # '!handle!suffix'
  74. // SCALAR(value,style) # A scalar.
  75. //
  76. // The following two tokens are "virtual" tokens denoting the beginning and the
  77. // end of the stream:
  78. //
  79. // STREAM-START(encoding)
  80. // STREAM-END
  81. //
  82. // We pass the information about the input stream encoding with the
  83. // STREAM-START token.
  84. //
  85. // The next two tokens are responsible for tags:
  86. //
  87. // VERSION-DIRECTIVE(major,minor)
  88. // TAG-DIRECTIVE(handle,prefix)
  89. //
  90. // Example:
  91. //
  92. // %YAML 1.1
  93. // %TAG ! !foo
  94. // %TAG !yaml! tag:yaml.org,2002:
  95. // ---
  96. //
  97. // The correspoding sequence of tokens:
  98. //
  99. // STREAM-START(utf-8)
  100. // VERSION-DIRECTIVE(1,1)
  101. // TAG-DIRECTIVE("!","!foo")
  102. // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
  103. // DOCUMENT-START
  104. // STREAM-END
  105. //
  106. // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
  107. // line.
  108. //
  109. // The document start and end indicators are represented by:
  110. //
  111. // DOCUMENT-START
  112. // DOCUMENT-END
  113. //
  114. // Note that if a YAML stream contains an implicit document (without '---'
  115. // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
  116. // produced.
  117. //
  118. // In the following examples, we present whole documents together with the
  119. // produced tokens.
  120. //
  121. // 1. An implicit document:
  122. //
  123. // 'a scalar'
  124. //
  125. // Tokens:
  126. //
  127. // STREAM-START(utf-8)
  128. // SCALAR("a scalar",single-quoted)
  129. // STREAM-END
  130. //
  131. // 2. An explicit document:
  132. //
  133. // ---
  134. // 'a scalar'
  135. // ...
  136. //
  137. // Tokens:
  138. //
  139. // STREAM-START(utf-8)
  140. // DOCUMENT-START
  141. // SCALAR("a scalar",single-quoted)
  142. // DOCUMENT-END
  143. // STREAM-END
  144. //
  145. // 3. Several documents in a stream:
  146. //
  147. // 'a scalar'
  148. // ---
  149. // 'another scalar'
  150. // ---
  151. // 'yet another scalar'
  152. //
  153. // Tokens:
  154. //
  155. // STREAM-START(utf-8)
  156. // SCALAR("a scalar",single-quoted)
  157. // DOCUMENT-START
  158. // SCALAR("another scalar",single-quoted)
  159. // DOCUMENT-START
  160. // SCALAR("yet another scalar",single-quoted)
  161. // STREAM-END
  162. //
  163. // We have already introduced the SCALAR token above. The following tokens are
  164. // used to describe aliases, anchors, tag, and scalars:
  165. //
  166. // ALIAS(anchor)
  167. // ANCHOR(anchor)
  168. // TAG(handle,suffix)
  169. // SCALAR(value,style)
  170. //
  171. // The following series of examples illustrate the usage of these tokens:
  172. //
  173. // 1. A recursive sequence:
  174. //
  175. // &A [ *A ]
  176. //
  177. // Tokens:
  178. //
  179. // STREAM-START(utf-8)
  180. // ANCHOR("A")
  181. // FLOW-SEQUENCE-START
  182. // ALIAS("A")
  183. // FLOW-SEQUENCE-END
  184. // STREAM-END
  185. //
  186. // 2. A tagged scalar:
  187. //
  188. // !!float "3.14" # A good approximation.
  189. //
  190. // Tokens:
  191. //
  192. // STREAM-START(utf-8)
  193. // TAG("!!","float")
  194. // SCALAR("3.14",double-quoted)
  195. // STREAM-END
  196. //
  197. // 3. Various scalar styles:
  198. //
  199. // --- # Implicit empty plain scalars do not produce tokens.
  200. // --- a plain scalar
  201. // --- 'a single-quoted scalar'
  202. // --- "a double-quoted scalar"
  203. // --- |-
  204. // a literal scalar
  205. // --- >-
  206. // a folded
  207. // scalar
  208. //
  209. // Tokens:
  210. //
  211. // STREAM-START(utf-8)
  212. // DOCUMENT-START
  213. // DOCUMENT-START
  214. // SCALAR("a plain scalar",plain)
  215. // DOCUMENT-START
  216. // SCALAR("a single-quoted scalar",single-quoted)
  217. // DOCUMENT-START
  218. // SCALAR("a double-quoted scalar",double-quoted)
  219. // DOCUMENT-START
  220. // SCALAR("a literal scalar",literal)
  221. // DOCUMENT-START
  222. // SCALAR("a folded scalar",folded)
  223. // STREAM-END
  224. //
  225. // Now it's time to review collection-related tokens. We will start with
  226. // flow collections:
  227. //
  228. // FLOW-SEQUENCE-START
  229. // FLOW-SEQUENCE-END
  230. // FLOW-MAPPING-START
  231. // FLOW-MAPPING-END
  232. // FLOW-ENTRY
  233. // KEY
  234. // VALUE
  235. //
  236. // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
  237. // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
  238. // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
  239. // indicators '?' and ':', which are used for denoting mapping keys and values,
  240. // are represented by the KEY and VALUE tokens.
  241. //
  242. // The following examples show flow collections:
  243. //
  244. // 1. A flow sequence:
  245. //
  246. // [item 1, item 2, item 3]
  247. //
  248. // Tokens:
  249. //
  250. // STREAM-START(utf-8)
  251. // FLOW-SEQUENCE-START
  252. // SCALAR("item 1",plain)
  253. // FLOW-ENTRY
  254. // SCALAR("item 2",plain)
  255. // FLOW-ENTRY
  256. // SCALAR("item 3",plain)
  257. // FLOW-SEQUENCE-END
  258. // STREAM-END
  259. //
  260. // 2. A flow mapping:
  261. //
  262. // {
  263. // a simple key: a value, # Note that the KEY token is produced.
  264. // ? a complex key: another value,
  265. // }
  266. //
  267. // Tokens:
  268. //
  269. // STREAM-START(utf-8)
  270. // FLOW-MAPPING-START
  271. // KEY
  272. // SCALAR("a simple key",plain)
  273. // VALUE
  274. // SCALAR("a value",plain)
  275. // FLOW-ENTRY
  276. // KEY
  277. // SCALAR("a complex key",plain)
  278. // VALUE
  279. // SCALAR("another value",plain)
  280. // FLOW-ENTRY
  281. // FLOW-MAPPING-END
  282. // STREAM-END
  283. //
  284. // A simple key is a key which is not denoted by the '?' indicator. Note that
  285. // the Scanner still produce the KEY token whenever it encounters a simple key.
  286. //
  287. // For scanning block collections, the following tokens are used (note that we
  288. // repeat KEY and VALUE here):
  289. //
  290. // BLOCK-SEQUENCE-START
  291. // BLOCK-MAPPING-START
  292. // BLOCK-END
  293. // BLOCK-ENTRY
  294. // KEY
  295. // VALUE
  296. //
  297. // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
  298. // increase that precedes a block collection (cf. the INDENT token in Python).
  299. // The token BLOCK-END denote indentation decrease that ends a block collection
  300. // (cf. the DEDENT token in Python). However YAML has some syntax pecularities
  301. // that makes detections of these tokens more complex.
  302. //
  303. // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
  304. // '-', '?', and ':' correspondingly.
  305. //
  306. // The following examples show how the tokens BLOCK-SEQUENCE-START,
  307. // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
  308. //
  309. // 1. Block sequences:
  310. //
  311. // - item 1
  312. // - item 2
  313. // -
  314. // - item 3.1
  315. // - item 3.2
  316. // -
  317. // key 1: value 1
  318. // key 2: value 2
  319. //
  320. // Tokens:
  321. //
  322. // STREAM-START(utf-8)
  323. // BLOCK-SEQUENCE-START
  324. // BLOCK-ENTRY
  325. // SCALAR("item 1",plain)
  326. // BLOCK-ENTRY
  327. // SCALAR("item 2",plain)
  328. // BLOCK-ENTRY
  329. // BLOCK-SEQUENCE-START
  330. // BLOCK-ENTRY
  331. // SCALAR("item 3.1",plain)
  332. // BLOCK-ENTRY
  333. // SCALAR("item 3.2",plain)
  334. // BLOCK-END
  335. // BLOCK-ENTRY
  336. // BLOCK-MAPPING-START
  337. // KEY
  338. // SCALAR("key 1",plain)
  339. // VALUE
  340. // SCALAR("value 1",plain)
  341. // KEY
  342. // SCALAR("key 2",plain)
  343. // VALUE
  344. // SCALAR("value 2",plain)
  345. // BLOCK-END
  346. // BLOCK-END
  347. // STREAM-END
  348. //
  349. // 2. Block mappings:
  350. //
  351. // a simple key: a value # The KEY token is produced here.
  352. // ? a complex key
  353. // : another value
  354. // a mapping:
  355. // key 1: value 1
  356. // key 2: value 2
  357. // a sequence:
  358. // - item 1
  359. // - item 2
  360. //
  361. // Tokens:
  362. //
  363. // STREAM-START(utf-8)
  364. // BLOCK-MAPPING-START
  365. // KEY
  366. // SCALAR("a simple key",plain)
  367. // VALUE
  368. // SCALAR("a value",plain)
  369. // KEY
  370. // SCALAR("a complex key",plain)
  371. // VALUE
  372. // SCALAR("another value",plain)
  373. // KEY
  374. // SCALAR("a mapping",plain)
  375. // BLOCK-MAPPING-START
  376. // KEY
  377. // SCALAR("key 1",plain)
  378. // VALUE
  379. // SCALAR("value 1",plain)
  380. // KEY
  381. // SCALAR("key 2",plain)
  382. // VALUE
  383. // SCALAR("value 2",plain)
  384. // BLOCK-END
  385. // KEY
  386. // SCALAR("a sequence",plain)
  387. // VALUE
  388. // BLOCK-SEQUENCE-START
  389. // BLOCK-ENTRY
  390. // SCALAR("item 1",plain)
  391. // BLOCK-ENTRY
  392. // SCALAR("item 2",plain)
  393. // BLOCK-END
  394. // BLOCK-END
  395. // STREAM-END
  396. //
  397. // YAML does not always require to start a new block collection from a new
  398. // line. If the current line contains only '-', '?', and ':' indicators, a new
  399. // block collection may start at the current line. The following examples
  400. // illustrate this case:
  401. //
  402. // 1. Collections in a sequence:
  403. //
  404. // - - item 1
  405. // - item 2
  406. // - key 1: value 1
  407. // key 2: value 2
  408. // - ? complex key
  409. // : complex value
  410. //
  411. // Tokens:
  412. //
  413. // STREAM-START(utf-8)
  414. // BLOCK-SEQUENCE-START
  415. // BLOCK-ENTRY
  416. // BLOCK-SEQUENCE-START
  417. // BLOCK-ENTRY
  418. // SCALAR("item 1",plain)
  419. // BLOCK-ENTRY
  420. // SCALAR("item 2",plain)
  421. // BLOCK-END
  422. // BLOCK-ENTRY
  423. // BLOCK-MAPPING-START
  424. // KEY
  425. // SCALAR("key 1",plain)
  426. // VALUE
  427. // SCALAR("value 1",plain)
  428. // KEY
  429. // SCALAR("key 2",plain)
  430. // VALUE
  431. // SCALAR("value 2",plain)
  432. // BLOCK-END
  433. // BLOCK-ENTRY
  434. // BLOCK-MAPPING-START
  435. // KEY
  436. // SCALAR("complex key")
  437. // VALUE
  438. // SCALAR("complex value")
  439. // BLOCK-END
  440. // BLOCK-END
  441. // STREAM-END
  442. //
  443. // 2. Collections in a mapping:
  444. //
  445. // ? a sequence
  446. // : - item 1
  447. // - item 2
  448. // ? a mapping
  449. // : key 1: value 1
  450. // key 2: value 2
  451. //
  452. // Tokens:
  453. //
  454. // STREAM-START(utf-8)
  455. // BLOCK-MAPPING-START
  456. // KEY
  457. // SCALAR("a sequence",plain)
  458. // VALUE
  459. // BLOCK-SEQUENCE-START
  460. // BLOCK-ENTRY
  461. // SCALAR("item 1",plain)
  462. // BLOCK-ENTRY
  463. // SCALAR("item 2",plain)
  464. // BLOCK-END
  465. // KEY
  466. // SCALAR("a mapping",plain)
  467. // VALUE
  468. // BLOCK-MAPPING-START
  469. // KEY
  470. // SCALAR("key 1",plain)
  471. // VALUE
  472. // SCALAR("value 1",plain)
  473. // KEY
  474. // SCALAR("key 2",plain)
  475. // VALUE
  476. // SCALAR("value 2",plain)
  477. // BLOCK-END
  478. // BLOCK-END
  479. // STREAM-END
  480. //
  481. // YAML also permits non-indented sequences if they are included into a block
  482. // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
  483. //
  484. // key:
  485. // - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
  486. // - item 2
  487. //
  488. // Tokens:
  489. //
  490. // STREAM-START(utf-8)
  491. // BLOCK-MAPPING-START
  492. // KEY
  493. // SCALAR("key",plain)
  494. // VALUE
  495. // BLOCK-ENTRY
  496. // SCALAR("item 1",plain)
  497. // BLOCK-ENTRY
  498. // SCALAR("item 2",plain)
  499. // BLOCK-END
  500. //
  501. // Ensure that the buffer contains the required number of characters.
  502. // Return true on success, false on failure (reader error or memory error).
  503. func cache(parser *yaml_parser_t, length int) bool {
  504. // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B)
  505. return parser.unread >= length || yaml_parser_update_buffer(parser, length)
  506. }
  507. // Advance the buffer pointer.
  508. func skip(parser *yaml_parser_t) {
  509. if !is_blank(parser.buffer, parser.buffer_pos) {
  510. parser.newlines = 0
  511. }
  512. parser.mark.index++
  513. parser.mark.column++
  514. parser.unread--
  515. parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
  516. }
  517. func skip_line(parser *yaml_parser_t) {
  518. if is_crlf(parser.buffer, parser.buffer_pos) {
  519. parser.mark.index += 2
  520. parser.mark.column = 0
  521. parser.mark.line++
  522. parser.unread -= 2
  523. parser.buffer_pos += 2
  524. parser.newlines++
  525. } else if is_break(parser.buffer, parser.buffer_pos) {
  526. parser.mark.index++
  527. parser.mark.column = 0
  528. parser.mark.line++
  529. parser.unread--
  530. parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
  531. parser.newlines++
  532. }
  533. }
  534. // Copy a character to a string buffer and advance pointers.
  535. func read(parser *yaml_parser_t, s []byte) []byte {
  536. if !is_blank(parser.buffer, parser.buffer_pos) {
  537. parser.newlines = 0
  538. }
  539. w := width(parser.buffer[parser.buffer_pos])
  540. if w == 0 {
  541. panic("invalid character sequence")
  542. }
  543. if len(s) == 0 {
  544. s = make([]byte, 0, 32)
  545. }
  546. if w == 1 && len(s)+w <= cap(s) {
  547. s = s[:len(s)+1]
  548. s[len(s)-1] = parser.buffer[parser.buffer_pos]
  549. parser.buffer_pos++
  550. } else {
  551. s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...)
  552. parser.buffer_pos += w
  553. }
  554. parser.mark.index++
  555. parser.mark.column++
  556. parser.unread--
  557. return s
  558. }
  559. // Copy a line break character to a string buffer and advance pointers.
  560. func read_line(parser *yaml_parser_t, s []byte) []byte {
  561. buf := parser.buffer
  562. pos := parser.buffer_pos
  563. switch {
  564. case buf[pos] == '\r' && buf[pos+1] == '\n':
  565. // CR LF . LF
  566. s = append(s, '\n')
  567. parser.buffer_pos += 2
  568. parser.mark.index++
  569. parser.unread--
  570. case buf[pos] == '\r' || buf[pos] == '\n':
  571. // CR|LF . LF
  572. s = append(s, '\n')
  573. parser.buffer_pos += 1
  574. case buf[pos] == '\xC2' && buf[pos+1] == '\x85':
  575. // NEL . LF
  576. s = append(s, '\n')
  577. parser.buffer_pos += 2
  578. case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'):
  579. // LS|PS . LS|PS
  580. s = append(s, buf[parser.buffer_pos:pos+3]...)
  581. parser.buffer_pos += 3
  582. default:
  583. return s
  584. }
  585. parser.mark.index++
  586. parser.mark.column = 0
  587. parser.mark.line++
  588. parser.unread--
  589. parser.newlines++
  590. return s
  591. }
  592. // Get the next token.
  593. func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool {
  594. // Erase the token object.
  595. *token = yaml_token_t{} // [Go] Is this necessary?
  596. // No tokens after STREAM-END or error.
  597. if parser.stream_end_produced || parser.error != yaml_NO_ERROR {
  598. return true
  599. }
  600. // Ensure that the tokens queue contains enough tokens.
  601. if !parser.token_available {
  602. if !yaml_parser_fetch_more_tokens(parser) {
  603. return false
  604. }
  605. }
  606. // Fetch the next token from the queue.
  607. *token = parser.tokens[parser.tokens_head]
  608. parser.tokens_head++
  609. parser.tokens_parsed++
  610. parser.token_available = false
  611. if token.typ == yaml_STREAM_END_TOKEN {
  612. parser.stream_end_produced = true
  613. }
  614. return true
  615. }
  616. // Set the scanner error and return false.
  617. func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool {
  618. parser.error = yaml_SCANNER_ERROR
  619. parser.context = context
  620. parser.context_mark = context_mark
  621. parser.problem = problem
  622. parser.problem_mark = parser.mark
  623. return false
  624. }
  625. func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool {
  626. context := "while parsing a tag"
  627. if directive {
  628. context = "while parsing a %TAG directive"
  629. }
  630. return yaml_parser_set_scanner_error(parser, context, context_mark, problem)
  631. }
  632. func trace(args ...interface{}) func() {
  633. pargs := append([]interface{}{"+++"}, args...)
  634. fmt.Println(pargs...)
  635. pargs = append([]interface{}{"---"}, args...)
  636. return func() { fmt.Println(pargs...) }
  637. }
  638. // Ensure that the tokens queue contains at least one token which can be
  639. // returned to the Parser.
  640. func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
  641. // While we need more tokens to fetch, do it.
  642. for {
  643. // [Go] The comment parsing logic requires a lookahead of two tokens
  644. // so that foot comments may be parsed in time of associating them
  645. // with the tokens that are parsed before them, and also for line
  646. // comments to be transformed into head comments in some edge cases.
  647. if parser.tokens_head < len(parser.tokens)-2 {
  648. // If a potential simple key is at the head position, we need to fetch
  649. // the next token to disambiguate it.
  650. head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
  651. if !ok {
  652. break
  653. } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
  654. return false
  655. } else if !valid {
  656. break
  657. }
  658. }
  659. // Fetch the next token.
  660. if !yaml_parser_fetch_next_token(parser) {
  661. return false
  662. }
  663. }
  664. parser.token_available = true
  665. return true
  666. }
  667. // The dispatcher for token fetchers.
  668. func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
  669. // Ensure that the buffer is initialized.
  670. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  671. return false
  672. }
  673. // Check if we just started scanning. Fetch STREAM-START then.
  674. if !parser.stream_start_produced {
  675. return yaml_parser_fetch_stream_start(parser)
  676. }
  677. scan_mark := parser.mark
  678. // Eat whitespaces and comments until we reach the next token.
  679. if !yaml_parser_scan_to_next_token(parser) {
  680. return false
  681. }
  682. // [Go] While unrolling indents, transform the head comments of prior
  683. // indentation levels observed after scan_start into foot comments at
  684. // the respective indexes.
  685. // Check the indentation level against the current column.
  686. if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) {
  687. return false
  688. }
  689. // Ensure that the buffer contains at least 4 characters. 4 is the length
  690. // of the longest indicators ('--- ' and '... ').
  691. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  692. return false
  693. }
  694. // Is it the end of the stream?
  695. if is_z(parser.buffer, parser.buffer_pos) {
  696. return yaml_parser_fetch_stream_end(parser)
  697. }
  698. // Is it a directive?
  699. if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' {
  700. return yaml_parser_fetch_directive(parser)
  701. }
  702. buf := parser.buffer
  703. pos := parser.buffer_pos
  704. // Is it the document start indicator?
  705. if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) {
  706. return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN)
  707. }
  708. // Is it the document end indicator?
  709. if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) {
  710. return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN)
  711. }
  712. comment_mark := parser.mark
  713. if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') {
  714. // Associate any following comments with the prior token.
  715. comment_mark = parser.tokens[len(parser.tokens)-1].start_mark
  716. }
  717. defer func() {
  718. if !ok {
  719. return
  720. }
  721. if !yaml_parser_scan_line_comment(parser, comment_mark) {
  722. ok = false
  723. return
  724. }
  725. }()
  726. // Is it the flow sequence start indicator?
  727. if buf[pos] == '[' {
  728. return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN)
  729. }
  730. // Is it the flow mapping start indicator?
  731. if parser.buffer[parser.buffer_pos] == '{' {
  732. return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN)
  733. }
  734. // Is it the flow sequence end indicator?
  735. if parser.buffer[parser.buffer_pos] == ']' {
  736. return yaml_parser_fetch_flow_collection_end(parser,
  737. yaml_FLOW_SEQUENCE_END_TOKEN)
  738. }
  739. // Is it the flow mapping end indicator?
  740. if parser.buffer[parser.buffer_pos] == '}' {
  741. return yaml_parser_fetch_flow_collection_end(parser,
  742. yaml_FLOW_MAPPING_END_TOKEN)
  743. }
  744. // Is it the flow entry indicator?
  745. if parser.buffer[parser.buffer_pos] == ',' {
  746. return yaml_parser_fetch_flow_entry(parser)
  747. }
  748. // Is it the block entry indicator?
  749. if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) {
  750. return yaml_parser_fetch_block_entry(parser)
  751. }
  752. // Is it the key indicator?
  753. if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
  754. return yaml_parser_fetch_key(parser)
  755. }
  756. // Is it the value indicator?
  757. if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
  758. return yaml_parser_fetch_value(parser)
  759. }
  760. // Is it an alias?
  761. if parser.buffer[parser.buffer_pos] == '*' {
  762. return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN)
  763. }
  764. // Is it an anchor?
  765. if parser.buffer[parser.buffer_pos] == '&' {
  766. return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN)
  767. }
  768. // Is it a tag?
  769. if parser.buffer[parser.buffer_pos] == '!' {
  770. return yaml_parser_fetch_tag(parser)
  771. }
  772. // Is it a literal scalar?
  773. if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 {
  774. return yaml_parser_fetch_block_scalar(parser, true)
  775. }
  776. // Is it a folded scalar?
  777. if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 {
  778. return yaml_parser_fetch_block_scalar(parser, false)
  779. }
  780. // Is it a single-quoted scalar?
  781. if parser.buffer[parser.buffer_pos] == '\'' {
  782. return yaml_parser_fetch_flow_scalar(parser, true)
  783. }
  784. // Is it a double-quoted scalar?
  785. if parser.buffer[parser.buffer_pos] == '"' {
  786. return yaml_parser_fetch_flow_scalar(parser, false)
  787. }
  788. // Is it a plain scalar?
  789. //
  790. // A plain scalar may start with any non-blank characters except
  791. //
  792. // '-', '?', ':', ',', '[', ']', '{', '}',
  793. // '#', '&', '*', '!', '|', '>', '\'', '\"',
  794. // '%', '@', '`'.
  795. //
  796. // In the block context (and, for the '-' indicator, in the flow context
  797. // too), it may also start with the characters
  798. //
  799. // '-', '?', ':'
  800. //
  801. // if it is followed by a non-space character.
  802. //
  803. // The last rule is more restrictive than the specification requires.
  804. // [Go] TODO Make this logic more reasonable.
  805. //switch parser.buffer[parser.buffer_pos] {
  806. //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`':
  807. //}
  808. if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' ||
  809. parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' ||
  810. parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' ||
  811. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
  812. parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' ||
  813. parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' ||
  814. parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' ||
  815. parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' ||
  816. parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' ||
  817. parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') ||
  818. (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) ||
  819. (parser.flow_level == 0 &&
  820. (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') &&
  821. !is_blankz(parser.buffer, parser.buffer_pos+1)) {
  822. return yaml_parser_fetch_plain_scalar(parser)
  823. }
  824. // If we don't determine the token type so far, it is an error.
  825. return yaml_parser_set_scanner_error(parser,
  826. "while scanning for the next token", parser.mark,
  827. "found character that cannot start any token")
  828. }
  829. func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) {
  830. if !simple_key.possible {
  831. return false, true
  832. }
  833. // The 1.2 specification says:
  834. //
  835. // "If the ? indicator is omitted, parsing needs to see past the
  836. // implicit key to recognize it as such. To limit the amount of
  837. // lookahead required, the “:” indicator must appear at most 1024
  838. // Unicode characters beyond the start of the key. In addition, the key
  839. // is restricted to a single line."
  840. //
  841. if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
  842. // Check if the potential simple key to be removed is required.
  843. if simple_key.required {
  844. return false, yaml_parser_set_scanner_error(parser,
  845. "while scanning a simple key", simple_key.mark,
  846. "could not find expected ':'")
  847. }
  848. simple_key.possible = false
  849. return false, true
  850. }
  851. return true, true
  852. }
  853. // Check if a simple key may start at the current position and add it if
  854. // needed.
  855. func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
  856. // A simple key is required at the current position if the scanner is in
  857. // the block context and the current column coincides with the indentation
  858. // level.
  859. required := parser.flow_level == 0 && parser.indent == parser.mark.column
  860. //
  861. // If the current position may start a simple key, save it.
  862. //
  863. if parser.simple_key_allowed {
  864. simple_key := yaml_simple_key_t{
  865. possible: true,
  866. required: required,
  867. token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
  868. mark: parser.mark,
  869. }
  870. if !yaml_parser_remove_simple_key(parser) {
  871. return false
  872. }
  873. parser.simple_keys[len(parser.simple_keys)-1] = simple_key
  874. parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
  875. }
  876. return true
  877. }
  878. // Remove a potential simple key at the current flow level.
  879. func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
  880. i := len(parser.simple_keys) - 1
  881. if parser.simple_keys[i].possible {
  882. // If the key is required, it is an error.
  883. if parser.simple_keys[i].required {
  884. return yaml_parser_set_scanner_error(parser,
  885. "while scanning a simple key", parser.simple_keys[i].mark,
  886. "could not find expected ':'")
  887. }
  888. // Remove the key from the stack.
  889. parser.simple_keys[i].possible = false
  890. delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
  891. }
  892. return true
  893. }
  894. // max_flow_level limits the flow_level
  895. const max_flow_level = 10000
  896. // Increase the flow level and resize the simple key list if needed.
  897. func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
  898. // Reset the simple key on the next level.
  899. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
  900. possible: false,
  901. required: false,
  902. token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
  903. mark: parser.mark,
  904. })
  905. // Increase the flow level.
  906. parser.flow_level++
  907. if parser.flow_level > max_flow_level {
  908. return yaml_parser_set_scanner_error(parser,
  909. "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
  910. fmt.Sprintf("exceeded max depth of %d", max_flow_level))
  911. }
  912. return true
  913. }
  914. // Decrease the flow level.
  915. func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
  916. if parser.flow_level > 0 {
  917. parser.flow_level--
  918. last := len(parser.simple_keys) - 1
  919. delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
  920. parser.simple_keys = parser.simple_keys[:last]
  921. }
  922. return true
  923. }
  924. // max_indents limits the indents stack size
  925. const max_indents = 10000
  926. // Push the current indentation level to the stack and set the new level
  927. // the current column is greater than the indentation level. In this case,
  928. // append or insert the specified token into the token queue.
  929. func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool {
  930. // In the flow context, do nothing.
  931. if parser.flow_level > 0 {
  932. return true
  933. }
  934. if parser.indent < column {
  935. // Push the current indentation level to the stack and set the new
  936. // indentation level.
  937. parser.indents = append(parser.indents, parser.indent)
  938. parser.indent = column
  939. if len(parser.indents) > max_indents {
  940. return yaml_parser_set_scanner_error(parser,
  941. "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
  942. fmt.Sprintf("exceeded max depth of %d", max_indents))
  943. }
  944. // Create a token and insert it into the queue.
  945. token := yaml_token_t{
  946. typ: typ,
  947. start_mark: mark,
  948. end_mark: mark,
  949. }
  950. if number > -1 {
  951. number -= parser.tokens_parsed
  952. }
  953. yaml_insert_token(parser, number, &token)
  954. }
  955. return true
  956. }
  957. // Pop indentation levels from the indents stack until the current level
  958. // becomes less or equal to the column. For each indentation level, append
  959. // the BLOCK-END token.
  960. func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool {
  961. // In the flow context, do nothing.
  962. if parser.flow_level > 0 {
  963. return true
  964. }
  965. block_mark := scan_mark
  966. block_mark.index--
  967. // Loop through the indentation levels in the stack.
  968. for parser.indent > column {
  969. // [Go] Reposition the end token before potential following
  970. // foot comments of parent blocks. For that, search
  971. // backwards for recent comments that were at the same
  972. // indent as the block that is ending now.
  973. stop_index := block_mark.index
  974. for i := len(parser.comments) - 1; i >= 0; i-- {
  975. comment := &parser.comments[i]
  976. if comment.end_mark.index < stop_index {
  977. // Don't go back beyond the start of the comment/whitespace scan, unless column < 0.
  978. // If requested indent column is < 0, then the document is over and everything else
  979. // is a foot anyway.
  980. break
  981. }
  982. if comment.start_mark.column == parser.indent+1 {
  983. // This is a good match. But maybe there's a former comment
  984. // at that same indent level, so keep searching.
  985. block_mark = comment.start_mark
  986. }
  987. // While the end of the former comment matches with
  988. // the start of the following one, we know there's
  989. // nothing in between and scanning is still safe.
  990. stop_index = comment.scan_mark.index
  991. }
  992. // Create a token and append it to the queue.
  993. token := yaml_token_t{
  994. typ: yaml_BLOCK_END_TOKEN,
  995. start_mark: block_mark,
  996. end_mark: block_mark,
  997. }
  998. yaml_insert_token(parser, -1, &token)
  999. // Pop the indentation level.
  1000. parser.indent = parser.indents[len(parser.indents)-1]
  1001. parser.indents = parser.indents[:len(parser.indents)-1]
  1002. }
  1003. return true
  1004. }
  1005. // Initialize the scanner and produce the STREAM-START token.
  1006. func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
  1007. // Set the initial indentation.
  1008. parser.indent = -1
  1009. // Initialize the simple key stack.
  1010. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
  1011. parser.simple_keys_by_tok = make(map[int]int)
  1012. // A simple key is allowed at the beginning of the stream.
  1013. parser.simple_key_allowed = true
  1014. // We have started.
  1015. parser.stream_start_produced = true
  1016. // Create the STREAM-START token and append it to the queue.
  1017. token := yaml_token_t{
  1018. typ: yaml_STREAM_START_TOKEN,
  1019. start_mark: parser.mark,
  1020. end_mark: parser.mark,
  1021. encoding: parser.encoding,
  1022. }
  1023. yaml_insert_token(parser, -1, &token)
  1024. return true
  1025. }
  1026. // Produce the STREAM-END token and shut down the scanner.
  1027. func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
  1028. // Force new line.
  1029. if parser.mark.column != 0 {
  1030. parser.mark.column = 0
  1031. parser.mark.line++
  1032. }
  1033. // Reset the indentation level.
  1034. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1035. return false
  1036. }
  1037. // Reset simple keys.
  1038. if !yaml_parser_remove_simple_key(parser) {
  1039. return false
  1040. }
  1041. parser.simple_key_allowed = false
  1042. // Create the STREAM-END token and append it to the queue.
  1043. token := yaml_token_t{
  1044. typ: yaml_STREAM_END_TOKEN,
  1045. start_mark: parser.mark,
  1046. end_mark: parser.mark,
  1047. }
  1048. yaml_insert_token(parser, -1, &token)
  1049. return true
  1050. }
  1051. // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
  1052. func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
  1053. // Reset the indentation level.
  1054. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1055. return false
  1056. }
  1057. // Reset simple keys.
  1058. if !yaml_parser_remove_simple_key(parser) {
  1059. return false
  1060. }
  1061. parser.simple_key_allowed = false
  1062. // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1063. token := yaml_token_t{}
  1064. if !yaml_parser_scan_directive(parser, &token) {
  1065. return false
  1066. }
  1067. // Append the token to the queue.
  1068. yaml_insert_token(parser, -1, &token)
  1069. return true
  1070. }
  1071. // Produce the DOCUMENT-START or DOCUMENT-END token.
  1072. func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1073. // Reset the indentation level.
  1074. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1075. return false
  1076. }
  1077. // Reset simple keys.
  1078. if !yaml_parser_remove_simple_key(parser) {
  1079. return false
  1080. }
  1081. parser.simple_key_allowed = false
  1082. // Consume the token.
  1083. start_mark := parser.mark
  1084. skip(parser)
  1085. skip(parser)
  1086. skip(parser)
  1087. end_mark := parser.mark
  1088. // Create the DOCUMENT-START or DOCUMENT-END token.
  1089. token := yaml_token_t{
  1090. typ: typ,
  1091. start_mark: start_mark,
  1092. end_mark: end_mark,
  1093. }
  1094. // Append the token to the queue.
  1095. yaml_insert_token(parser, -1, &token)
  1096. return true
  1097. }
  1098. // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
  1099. func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1100. // The indicators '[' and '{' may start a simple key.
  1101. if !yaml_parser_save_simple_key(parser) {
  1102. return false
  1103. }
  1104. // Increase the flow level.
  1105. if !yaml_parser_increase_flow_level(parser) {
  1106. return false
  1107. }
  1108. // A simple key may follow the indicators '[' and '{'.
  1109. parser.simple_key_allowed = true
  1110. // Consume the token.
  1111. start_mark := parser.mark
  1112. skip(parser)
  1113. end_mark := parser.mark
  1114. // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token.
  1115. token := yaml_token_t{
  1116. typ: typ,
  1117. start_mark: start_mark,
  1118. end_mark: end_mark,
  1119. }
  1120. // Append the token to the queue.
  1121. yaml_insert_token(parser, -1, &token)
  1122. return true
  1123. }
  1124. // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
  1125. func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1126. // Reset any potential simple key on the current flow level.
  1127. if !yaml_parser_remove_simple_key(parser) {
  1128. return false
  1129. }
  1130. // Decrease the flow level.
  1131. if !yaml_parser_decrease_flow_level(parser) {
  1132. return false
  1133. }
  1134. // No simple keys after the indicators ']' and '}'.
  1135. parser.simple_key_allowed = false
  1136. // Consume the token.
  1137. start_mark := parser.mark
  1138. skip(parser)
  1139. end_mark := parser.mark
  1140. // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token.
  1141. token := yaml_token_t{
  1142. typ: typ,
  1143. start_mark: start_mark,
  1144. end_mark: end_mark,
  1145. }
  1146. // Append the token to the queue.
  1147. yaml_insert_token(parser, -1, &token)
  1148. return true
  1149. }
  1150. // Produce the FLOW-ENTRY token.
  1151. func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool {
  1152. // Reset any potential simple keys on the current flow level.
  1153. if !yaml_parser_remove_simple_key(parser) {
  1154. return false
  1155. }
  1156. // Simple keys are allowed after ','.
  1157. parser.simple_key_allowed = true
  1158. // Consume the token.
  1159. start_mark := parser.mark
  1160. skip(parser)
  1161. end_mark := parser.mark
  1162. // Create the FLOW-ENTRY token and append it to the queue.
  1163. token := yaml_token_t{
  1164. typ: yaml_FLOW_ENTRY_TOKEN,
  1165. start_mark: start_mark,
  1166. end_mark: end_mark,
  1167. }
  1168. yaml_insert_token(parser, -1, &token)
  1169. return true
  1170. }
  1171. // Produce the BLOCK-ENTRY token.
  1172. func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool {
  1173. // Check if the scanner is in the block context.
  1174. if parser.flow_level == 0 {
  1175. // Check if we are allowed to start a new entry.
  1176. if !parser.simple_key_allowed {
  1177. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1178. "block sequence entries are not allowed in this context")
  1179. }
  1180. // Add the BLOCK-SEQUENCE-START token if needed.
  1181. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) {
  1182. return false
  1183. }
  1184. } else {
  1185. // It is an error for the '-' indicator to occur in the flow context,
  1186. // but we let the Parser detect and report about it because the Parser
  1187. // is able to point to the context.
  1188. }
  1189. // Reset any potential simple keys on the current flow level.
  1190. if !yaml_parser_remove_simple_key(parser) {
  1191. return false
  1192. }
  1193. // Simple keys are allowed after '-'.
  1194. parser.simple_key_allowed = true
  1195. // Consume the token.
  1196. start_mark := parser.mark
  1197. skip(parser)
  1198. end_mark := parser.mark
  1199. // Create the BLOCK-ENTRY token and append it to the queue.
  1200. token := yaml_token_t{
  1201. typ: yaml_BLOCK_ENTRY_TOKEN,
  1202. start_mark: start_mark,
  1203. end_mark: end_mark,
  1204. }
  1205. yaml_insert_token(parser, -1, &token)
  1206. return true
  1207. }
  1208. // Produce the KEY token.
  1209. func yaml_parser_fetch_key(parser *yaml_parser_t) bool {
  1210. // In the block context, additional checks are required.
  1211. if parser.flow_level == 0 {
  1212. // Check if we are allowed to start a new key (not nessesary simple).
  1213. if !parser.simple_key_allowed {
  1214. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1215. "mapping keys are not allowed in this context")
  1216. }
  1217. // Add the BLOCK-MAPPING-START token if needed.
  1218. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
  1219. return false
  1220. }
  1221. }
  1222. // Reset any potential simple keys on the current flow level.
  1223. if !yaml_parser_remove_simple_key(parser) {
  1224. return false
  1225. }
  1226. // Simple keys are allowed after '?' in the block context.
  1227. parser.simple_key_allowed = parser.flow_level == 0
  1228. // Consume the token.
  1229. start_mark := parser.mark
  1230. skip(parser)
  1231. end_mark := parser.mark
  1232. // Create the KEY token and append it to the queue.
  1233. token := yaml_token_t{
  1234. typ: yaml_KEY_TOKEN,
  1235. start_mark: start_mark,
  1236. end_mark: end_mark,
  1237. }
  1238. yaml_insert_token(parser, -1, &token)
  1239. return true
  1240. }
  1241. // Produce the VALUE token.
  1242. func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
  1243. simple_key := &parser.simple_keys[len(parser.simple_keys)-1]
  1244. // Have we found a simple key?
  1245. if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
  1246. return false
  1247. } else if valid {
  1248. // Create the KEY token and insert it into the queue.
  1249. token := yaml_token_t{
  1250. typ: yaml_KEY_TOKEN,
  1251. start_mark: simple_key.mark,
  1252. end_mark: simple_key.mark,
  1253. }
  1254. yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token)
  1255. // In the block context, we may need to add the BLOCK-MAPPING-START token.
  1256. if !yaml_parser_roll_indent(parser, simple_key.mark.column,
  1257. simple_key.token_number,
  1258. yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) {
  1259. return false
  1260. }
  1261. // Remove the simple key.
  1262. simple_key.possible = false
  1263. delete(parser.simple_keys_by_tok, simple_key.token_number)
  1264. // A simple key cannot follow another simple key.
  1265. parser.simple_key_allowed = false
  1266. } else {
  1267. // The ':' indicator follows a complex key.
  1268. // In the block context, extra checks are required.
  1269. if parser.flow_level == 0 {
  1270. // Check if we are allowed to start a complex value.
  1271. if !parser.simple_key_allowed {
  1272. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1273. "mapping values are not allowed in this context")
  1274. }
  1275. // Add the BLOCK-MAPPING-START token if needed.
  1276. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
  1277. return false
  1278. }
  1279. }
  1280. // Simple keys after ':' are allowed in the block context.
  1281. parser.simple_key_allowed = parser.flow_level == 0
  1282. }
  1283. // Consume the token.
  1284. start_mark := parser.mark
  1285. skip(parser)
  1286. end_mark := parser.mark
  1287. // Create the VALUE token and append it to the queue.
  1288. token := yaml_token_t{
  1289. typ: yaml_VALUE_TOKEN,
  1290. start_mark: start_mark,
  1291. end_mark: end_mark,
  1292. }
  1293. yaml_insert_token(parser, -1, &token)
  1294. return true
  1295. }
  1296. // Produce the ALIAS or ANCHOR token.
  1297. func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1298. // An anchor or an alias could be a simple key.
  1299. if !yaml_parser_save_simple_key(parser) {
  1300. return false
  1301. }
  1302. // A simple key cannot follow an anchor or an alias.
  1303. parser.simple_key_allowed = false
  1304. // Create the ALIAS or ANCHOR token and append it to the queue.
  1305. var token yaml_token_t
  1306. if !yaml_parser_scan_anchor(parser, &token, typ) {
  1307. return false
  1308. }
  1309. yaml_insert_token(parser, -1, &token)
  1310. return true
  1311. }
  1312. // Produce the TAG token.
  1313. func yaml_parser_fetch_tag(parser *yaml_parser_t) bool {
  1314. // A tag could be a simple key.
  1315. if !yaml_parser_save_simple_key(parser) {
  1316. return false
  1317. }
  1318. // A simple key cannot follow a tag.
  1319. parser.simple_key_allowed = false
  1320. // Create the TAG token and append it to the queue.
  1321. var token yaml_token_t
  1322. if !yaml_parser_scan_tag(parser, &token) {
  1323. return false
  1324. }
  1325. yaml_insert_token(parser, -1, &token)
  1326. return true
  1327. }
  1328. // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
  1329. func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool {
  1330. // Remove any potential simple keys.
  1331. if !yaml_parser_remove_simple_key(parser) {
  1332. return false
  1333. }
  1334. // A simple key may follow a block scalar.
  1335. parser.simple_key_allowed = true
  1336. // Create the SCALAR token and append it to the queue.
  1337. var token yaml_token_t
  1338. if !yaml_parser_scan_block_scalar(parser, &token, literal) {
  1339. return false
  1340. }
  1341. yaml_insert_token(parser, -1, &token)
  1342. return true
  1343. }
  1344. // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
  1345. func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool {
  1346. // A plain scalar could be a simple key.
  1347. if !yaml_parser_save_simple_key(parser) {
  1348. return false
  1349. }
  1350. // A simple key cannot follow a flow scalar.
  1351. parser.simple_key_allowed = false
  1352. // Create the SCALAR token and append it to the queue.
  1353. var token yaml_token_t
  1354. if !yaml_parser_scan_flow_scalar(parser, &token, single) {
  1355. return false
  1356. }
  1357. yaml_insert_token(parser, -1, &token)
  1358. return true
  1359. }
  1360. // Produce the SCALAR(...,plain) token.
  1361. func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {
  1362. // A plain scalar could be a simple key.
  1363. if !yaml_parser_save_simple_key(parser) {
  1364. return false
  1365. }
  1366. // A simple key cannot follow a flow scalar.
  1367. parser.simple_key_allowed = false
  1368. // Create the SCALAR token and append it to the queue.
  1369. var token yaml_token_t
  1370. if !yaml_parser_scan_plain_scalar(parser, &token) {
  1371. return false
  1372. }
  1373. yaml_insert_token(parser, -1, &token)
  1374. return true
  1375. }
  1376. // Eat whitespaces and comments until the next token is found.
  1377. func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
  1378. scan_mark := parser.mark
  1379. // Until the next token is not found.
  1380. for {
  1381. // Allow the BOM mark to start a line.
  1382. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1383. return false
  1384. }
  1385. if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) {
  1386. skip(parser)
  1387. }
  1388. // Eat whitespaces.
  1389. // Tabs are allowed:
  1390. // - in the flow context
  1391. // - in the block context, but not at the beginning of the line or
  1392. // after '-', '?', or ':' (complex value).
  1393. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1394. return false
  1395. }
  1396. for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') {
  1397. skip(parser)
  1398. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1399. return false
  1400. }
  1401. }
  1402. // Check if we just had a line comment under a sequence entry that
  1403. // looks more like a header to the following content. Similar to this:
  1404. //
  1405. // - # The comment
  1406. // - Some data
  1407. //
  1408. // If so, transform the line comment to a head comment and reposition.
  1409. if len(parser.comments) > 0 && len(parser.tokens) > 1 {
  1410. tokenA := parser.tokens[len(parser.tokens)-2]
  1411. tokenB := parser.tokens[len(parser.tokens)-1]
  1412. comment := &parser.comments[len(parser.comments)-1]
  1413. if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {
  1414. // If it was in the prior line, reposition so it becomes a
  1415. // header of the follow up token. Otherwise, keep it in place
  1416. // so it becomes a header of the former.
  1417. comment.head = comment.line
  1418. comment.line = nil
  1419. if comment.start_mark.line == parser.mark.line-1 {
  1420. comment.token_mark = parser.mark
  1421. }
  1422. }
  1423. }
  1424. // Eat a comment until a line break.
  1425. if parser.buffer[parser.buffer_pos] == '#' {
  1426. if !yaml_parser_scan_comments(parser, scan_mark) {
  1427. return false
  1428. }
  1429. }
  1430. // If it is a line break, eat it.
  1431. if is_break(parser.buffer, parser.buffer_pos) {
  1432. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1433. return false
  1434. }
  1435. skip_line(parser)
  1436. // In the block context, a new line may start a simple key.
  1437. if parser.flow_level == 0 {
  1438. parser.simple_key_allowed = true
  1439. }
  1440. } else {
  1441. break // We have found a token.
  1442. }
  1443. }
  1444. return true
  1445. }
  1446. // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1447. //
  1448. // Scope:
  1449. // %YAML 1.1 # a comment \n
  1450. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1451. // %TAG !yaml! tag:yaml.org,2002: \n
  1452. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1453. //
  1454. func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool {
  1455. // Eat '%'.
  1456. start_mark := parser.mark
  1457. skip(parser)
  1458. // Scan the directive name.
  1459. var name []byte
  1460. if !yaml_parser_scan_directive_name(parser, start_mark, &name) {
  1461. return false
  1462. }
  1463. // Is it a YAML directive?
  1464. if bytes.Equal(name, []byte("YAML")) {
  1465. // Scan the VERSION directive value.
  1466. var major, minor int8
  1467. if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) {
  1468. return false
  1469. }
  1470. end_mark := parser.mark
  1471. // Create a VERSION-DIRECTIVE token.
  1472. *token = yaml_token_t{
  1473. typ: yaml_VERSION_DIRECTIVE_TOKEN,
  1474. start_mark: start_mark,
  1475. end_mark: end_mark,
  1476. major: major,
  1477. minor: minor,
  1478. }
  1479. // Is it a TAG directive?
  1480. } else if bytes.Equal(name, []byte("TAG")) {
  1481. // Scan the TAG directive value.
  1482. var handle, prefix []byte
  1483. if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) {
  1484. return false
  1485. }
  1486. end_mark := parser.mark
  1487. // Create a TAG-DIRECTIVE token.
  1488. *token = yaml_token_t{
  1489. typ: yaml_TAG_DIRECTIVE_TOKEN,
  1490. start_mark: start_mark,
  1491. end_mark: end_mark,
  1492. value: handle,
  1493. prefix: prefix,
  1494. }
  1495. // Unknown directive.
  1496. } else {
  1497. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1498. start_mark, "found unknown directive name")
  1499. return false
  1500. }
  1501. // Eat the rest of the line including any comments.
  1502. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1503. return false
  1504. }
  1505. for is_blank(parser.buffer, parser.buffer_pos) {
  1506. skip(parser)
  1507. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1508. return false
  1509. }
  1510. }
  1511. if parser.buffer[parser.buffer_pos] == '#' {
  1512. // [Go] Discard this inline comment for the time being.
  1513. //if !yaml_parser_scan_line_comment(parser, start_mark) {
  1514. // return false
  1515. //}
  1516. for !is_breakz(parser.buffer, parser.buffer_pos) {
  1517. skip(parser)
  1518. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1519. return false
  1520. }
  1521. }
  1522. }
  1523. // Check if we are at the end of the line.
  1524. if !is_breakz(parser.buffer, parser.buffer_pos) {
  1525. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1526. start_mark, "did not find expected comment or line break")
  1527. return false
  1528. }
  1529. // Eat a line break.
  1530. if is_break(parser.buffer, parser.buffer_pos) {
  1531. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1532. return false
  1533. }
  1534. skip_line(parser)
  1535. }
  1536. return true
  1537. }
  1538. // Scan the directive name.
  1539. //
  1540. // Scope:
  1541. // %YAML 1.1 # a comment \n
  1542. // ^^^^
  1543. // %TAG !yaml! tag:yaml.org,2002: \n
  1544. // ^^^
  1545. //
  1546. func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool {
  1547. // Consume the directive name.
  1548. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1549. return false
  1550. }
  1551. var s []byte
  1552. for is_alpha(parser.buffer, parser.buffer_pos) {
  1553. s = read(parser, s)
  1554. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1555. return false
  1556. }
  1557. }
  1558. // Check if the name is empty.
  1559. if len(s) == 0 {
  1560. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1561. start_mark, "could not find expected directive name")
  1562. return false
  1563. }
  1564. // Check for an blank character after the name.
  1565. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1566. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1567. start_mark, "found unexpected non-alphabetical character")
  1568. return false
  1569. }
  1570. *name = s
  1571. return true
  1572. }
  1573. // Scan the value of VERSION-DIRECTIVE.
  1574. //
  1575. // Scope:
  1576. // %YAML 1.1 # a comment \n
  1577. // ^^^^^^
  1578. func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool {
  1579. // Eat whitespaces.
  1580. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1581. return false
  1582. }
  1583. for is_blank(parser.buffer, parser.buffer_pos) {
  1584. skip(parser)
  1585. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1586. return false
  1587. }
  1588. }
  1589. // Consume the major version number.
  1590. if !yaml_parser_scan_version_directive_number(parser, start_mark, major) {
  1591. return false
  1592. }
  1593. // Eat '.'.
  1594. if parser.buffer[parser.buffer_pos] != '.' {
  1595. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1596. start_mark, "did not find expected digit or '.' character")
  1597. }
  1598. skip(parser)
  1599. // Consume the minor version number.
  1600. if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) {
  1601. return false
  1602. }
  1603. return true
  1604. }
  1605. const max_number_length = 2
  1606. // Scan the version number of VERSION-DIRECTIVE.
  1607. //
  1608. // Scope:
  1609. // %YAML 1.1 # a comment \n
  1610. // ^
  1611. // %YAML 1.1 # a comment \n
  1612. // ^
  1613. func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool {
  1614. // Repeat while the next character is digit.
  1615. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1616. return false
  1617. }
  1618. var value, length int8
  1619. for is_digit(parser.buffer, parser.buffer_pos) {
  1620. // Check if the number is too long.
  1621. length++
  1622. if length > max_number_length {
  1623. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1624. start_mark, "found extremely long version number")
  1625. }
  1626. value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos))
  1627. skip(parser)
  1628. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1629. return false
  1630. }
  1631. }
  1632. // Check if the number was present.
  1633. if length == 0 {
  1634. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1635. start_mark, "did not find expected version number")
  1636. }
  1637. *number = value
  1638. return true
  1639. }
  1640. // Scan the value of a TAG-DIRECTIVE token.
  1641. //
  1642. // Scope:
  1643. // %TAG !yaml! tag:yaml.org,2002: \n
  1644. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1645. //
  1646. func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool {
  1647. var handle_value, prefix_value []byte
  1648. // Eat whitespaces.
  1649. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1650. return false
  1651. }
  1652. for is_blank(parser.buffer, parser.buffer_pos) {
  1653. skip(parser)
  1654. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1655. return false
  1656. }
  1657. }
  1658. // Scan a handle.
  1659. if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) {
  1660. return false
  1661. }
  1662. // Expect a whitespace.
  1663. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1664. return false
  1665. }
  1666. if !is_blank(parser.buffer, parser.buffer_pos) {
  1667. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1668. start_mark, "did not find expected whitespace")
  1669. return false
  1670. }
  1671. // Eat whitespaces.
  1672. for is_blank(parser.buffer, parser.buffer_pos) {
  1673. skip(parser)
  1674. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1675. return false
  1676. }
  1677. }
  1678. // Scan a prefix.
  1679. if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) {
  1680. return false
  1681. }
  1682. // Expect a whitespace or line break.
  1683. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1684. return false
  1685. }
  1686. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1687. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1688. start_mark, "did not find expected whitespace or line break")
  1689. return false
  1690. }
  1691. *handle = handle_value
  1692. *prefix = prefix_value
  1693. return true
  1694. }
  1695. func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool {
  1696. var s []byte
  1697. // Eat the indicator character.
  1698. start_mark := parser.mark
  1699. skip(parser)
  1700. // Consume the value.
  1701. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1702. return false
  1703. }
  1704. for is_alpha(parser.buffer, parser.buffer_pos) {
  1705. s = read(parser, s)
  1706. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1707. return false
  1708. }
  1709. }
  1710. end_mark := parser.mark
  1711. /*
  1712. * Check if length of the anchor is greater than 0 and it is followed by
  1713. * a whitespace character or one of the indicators:
  1714. *
  1715. * '?', ':', ',', ']', '}', '%', '@', '`'.
  1716. */
  1717. if len(s) == 0 ||
  1718. !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' ||
  1719. parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' ||
  1720. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' ||
  1721. parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' ||
  1722. parser.buffer[parser.buffer_pos] == '`') {
  1723. context := "while scanning an alias"
  1724. if typ == yaml_ANCHOR_TOKEN {
  1725. context = "while scanning an anchor"
  1726. }
  1727. yaml_parser_set_scanner_error(parser, context, start_mark,
  1728. "did not find expected alphabetic or numeric character")
  1729. return false
  1730. }
  1731. // Create a token.
  1732. *token = yaml_token_t{
  1733. typ: typ,
  1734. start_mark: start_mark,
  1735. end_mark: end_mark,
  1736. value: s,
  1737. }
  1738. return true
  1739. }
  1740. /*
  1741. * Scan a TAG token.
  1742. */
  1743. func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool {
  1744. var handle, suffix []byte
  1745. start_mark := parser.mark
  1746. // Check if the tag is in the canonical form.
  1747. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1748. return false
  1749. }
  1750. if parser.buffer[parser.buffer_pos+1] == '<' {
  1751. // Keep the handle as ''
  1752. // Eat '!<'
  1753. skip(parser)
  1754. skip(parser)
  1755. // Consume the tag value.
  1756. if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
  1757. return false
  1758. }
  1759. // Check for '>' and eat it.
  1760. if parser.buffer[parser.buffer_pos] != '>' {
  1761. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1762. start_mark, "did not find the expected '>'")
  1763. return false
  1764. }
  1765. skip(parser)
  1766. } else {
  1767. // The tag has either the '!suffix' or the '!handle!suffix' form.
  1768. // First, try to scan a handle.
  1769. if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) {
  1770. return false
  1771. }
  1772. // Check if it is, indeed, handle.
  1773. if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' {
  1774. // Scan the suffix now.
  1775. if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
  1776. return false
  1777. }
  1778. } else {
  1779. // It wasn't a handle after all. Scan the rest of the tag.
  1780. if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) {
  1781. return false
  1782. }
  1783. // Set the handle to '!'.
  1784. handle = []byte{'!'}
  1785. // A special case: the '!' tag. Set the handle to '' and the
  1786. // suffix to '!'.
  1787. if len(suffix) == 0 {
  1788. handle, suffix = suffix, handle
  1789. }
  1790. }
  1791. }
  1792. // Check the character which ends the tag.
  1793. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1794. return false
  1795. }
  1796. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1797. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1798. start_mark, "did not find expected whitespace or line break")
  1799. return false
  1800. }
  1801. end_mark := parser.mark
  1802. // Create a token.
  1803. *token = yaml_token_t{
  1804. typ: yaml_TAG_TOKEN,
  1805. start_mark: start_mark,
  1806. end_mark: end_mark,
  1807. value: handle,
  1808. suffix: suffix,
  1809. }
  1810. return true
  1811. }
  1812. // Scan a tag handle.
  1813. func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool {
  1814. // Check the initial '!' character.
  1815. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1816. return false
  1817. }
  1818. if parser.buffer[parser.buffer_pos] != '!' {
  1819. yaml_parser_set_scanner_tag_error(parser, directive,
  1820. start_mark, "did not find expected '!'")
  1821. return false
  1822. }
  1823. var s []byte
  1824. // Copy the '!' character.
  1825. s = read(parser, s)
  1826. // Copy all subsequent alphabetical and numerical characters.
  1827. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1828. return false
  1829. }
  1830. for is_alpha(parser.buffer, parser.buffer_pos) {
  1831. s = read(parser, s)
  1832. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1833. return false
  1834. }
  1835. }
  1836. // Check if the trailing character is '!' and copy it.
  1837. if parser.buffer[parser.buffer_pos] == '!' {
  1838. s = read(parser, s)
  1839. } else {
  1840. // It's either the '!' tag or not really a tag handle. If it's a %TAG
  1841. // directive, it's an error. If it's a tag token, it must be a part of URI.
  1842. if directive && string(s) != "!" {
  1843. yaml_parser_set_scanner_tag_error(parser, directive,
  1844. start_mark, "did not find expected '!'")
  1845. return false
  1846. }
  1847. }
  1848. *handle = s
  1849. return true
  1850. }
  1851. // Scan a tag.
  1852. func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool {
  1853. //size_t length = head ? strlen((char *)head) : 0
  1854. var s []byte
  1855. hasTag := len(head) > 0
  1856. // Copy the head if needed.
  1857. //
  1858. // Note that we don't copy the leading '!' character.
  1859. if len(head) > 1 {
  1860. s = append(s, head[1:]...)
  1861. }
  1862. // Scan the tag.
  1863. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1864. return false
  1865. }
  1866. // The set of characters that may appear in URI is as follows:
  1867. //
  1868. // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
  1869. // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
  1870. // '%'.
  1871. // [Go] TODO Convert this into more reasonable logic.
  1872. for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' ||
  1873. parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' ||
  1874. parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' ||
  1875. parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' ||
  1876. parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' ||
  1877. parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' ||
  1878. parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' ||
  1879. parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' ||
  1880. parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' ||
  1881. parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' ||
  1882. parser.buffer[parser.buffer_pos] == '%' {
  1883. // Check if it is a URI-escape sequence.
  1884. if parser.buffer[parser.buffer_pos] == '%' {
  1885. if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) {
  1886. return false
  1887. }
  1888. } else {
  1889. s = read(parser, s)
  1890. }
  1891. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1892. return false
  1893. }
  1894. hasTag = true
  1895. }
  1896. if !hasTag {
  1897. yaml_parser_set_scanner_tag_error(parser, directive,
  1898. start_mark, "did not find expected tag URI")
  1899. return false
  1900. }
  1901. *uri = s
  1902. return true
  1903. }
  1904. // Decode an URI-escape sequence corresponding to a single UTF-8 character.
  1905. func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool {
  1906. // Decode the required number of characters.
  1907. w := 1024
  1908. for w > 0 {
  1909. // Check for a URI-escaped octet.
  1910. if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
  1911. return false
  1912. }
  1913. if !(parser.buffer[parser.buffer_pos] == '%' &&
  1914. is_hex(parser.buffer, parser.buffer_pos+1) &&
  1915. is_hex(parser.buffer, parser.buffer_pos+2)) {
  1916. return yaml_parser_set_scanner_tag_error(parser, directive,
  1917. start_mark, "did not find URI escaped octet")
  1918. }
  1919. // Get the octet.
  1920. octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2))
  1921. // If it is the leading octet, determine the length of the UTF-8 sequence.
  1922. if w == 1024 {
  1923. w = width(octet)
  1924. if w == 0 {
  1925. return yaml_parser_set_scanner_tag_error(parser, directive,
  1926. start_mark, "found an incorrect leading UTF-8 octet")
  1927. }
  1928. } else {
  1929. // Check if the trailing octet is correct.
  1930. if octet&0xC0 != 0x80 {
  1931. return yaml_parser_set_scanner_tag_error(parser, directive,
  1932. start_mark, "found an incorrect trailing UTF-8 octet")
  1933. }
  1934. }
  1935. // Copy the octet and move the pointers.
  1936. *s = append(*s, octet)
  1937. skip(parser)
  1938. skip(parser)
  1939. skip(parser)
  1940. w--
  1941. }
  1942. return true
  1943. }
  1944. // Scan a block scalar.
  1945. func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool {
  1946. // Eat the indicator '|' or '>'.
  1947. start_mark := parser.mark
  1948. skip(parser)
  1949. // Scan the additional block scalar indicators.
  1950. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1951. return false
  1952. }
  1953. // Check for a chomping indicator.
  1954. var chomping, increment int
  1955. if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
  1956. // Set the chomping method and eat the indicator.
  1957. if parser.buffer[parser.buffer_pos] == '+' {
  1958. chomping = +1
  1959. } else {
  1960. chomping = -1
  1961. }
  1962. skip(parser)
  1963. // Check for an indentation indicator.
  1964. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1965. return false
  1966. }
  1967. if is_digit(parser.buffer, parser.buffer_pos) {
  1968. // Check that the indentation is greater than 0.
  1969. if parser.buffer[parser.buffer_pos] == '0' {
  1970. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  1971. start_mark, "found an indentation indicator equal to 0")
  1972. return false
  1973. }
  1974. // Get the indentation level and eat the indicator.
  1975. increment = as_digit(parser.buffer, parser.buffer_pos)
  1976. skip(parser)
  1977. }
  1978. } else if is_digit(parser.buffer, parser.buffer_pos) {
  1979. // Do the same as above, but in the opposite order.
  1980. if parser.buffer[parser.buffer_pos] == '0' {
  1981. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  1982. start_mark, "found an indentation indicator equal to 0")
  1983. return false
  1984. }
  1985. increment = as_digit(parser.buffer, parser.buffer_pos)
  1986. skip(parser)
  1987. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1988. return false
  1989. }
  1990. if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
  1991. if parser.buffer[parser.buffer_pos] == '+' {
  1992. chomping = +1
  1993. } else {
  1994. chomping = -1
  1995. }
  1996. skip(parser)
  1997. }
  1998. }
  1999. // Eat whitespaces and comments to the end of the line.
  2000. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2001. return false
  2002. }
  2003. for is_blank(parser.buffer, parser.buffer_pos) {
  2004. skip(parser)
  2005. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2006. return false
  2007. }
  2008. }
  2009. if parser.buffer[parser.buffer_pos] == '#' {
  2010. // TODO Test this and then re-enable it.
  2011. //if !yaml_parser_scan_line_comment(parser, start_mark) {
  2012. // return false
  2013. //}
  2014. for !is_breakz(parser.buffer, parser.buffer_pos) {
  2015. skip(parser)
  2016. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2017. return false
  2018. }
  2019. }
  2020. }
  2021. // Check if we are at the end of the line.
  2022. if !is_breakz(parser.buffer, parser.buffer_pos) {
  2023. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  2024. start_mark, "did not find expected comment or line break")
  2025. return false
  2026. }
  2027. // Eat a line break.
  2028. if is_break(parser.buffer, parser.buffer_pos) {
  2029. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2030. return false
  2031. }
  2032. skip_line(parser)
  2033. }
  2034. end_mark := parser.mark
  2035. // Set the indentation level if it was specified.
  2036. var indent int
  2037. if increment > 0 {
  2038. if parser.indent >= 0 {
  2039. indent = parser.indent + increment
  2040. } else {
  2041. indent = increment
  2042. }
  2043. }
  2044. // Scan the leading line breaks and determine the indentation level if needed.
  2045. var s, leading_break, trailing_breaks []byte
  2046. if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
  2047. return false
  2048. }
  2049. // Scan the block scalar content.
  2050. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2051. return false
  2052. }
  2053. var leading_blank, trailing_blank bool
  2054. for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) {
  2055. // We are at the beginning of a non-empty line.
  2056. // Is it a trailing whitespace?
  2057. trailing_blank = is_blank(parser.buffer, parser.buffer_pos)
  2058. // Check if we need to fold the leading line break.
  2059. if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' {
  2060. // Do we need to join the lines by space?
  2061. if len(trailing_breaks) == 0 {
  2062. s = append(s, ' ')
  2063. }
  2064. } else {
  2065. s = append(s, leading_break...)
  2066. }
  2067. leading_break = leading_break[:0]
  2068. // Append the remaining line breaks.
  2069. s = append(s, trailing_breaks...)
  2070. trailing_breaks = trailing_breaks[:0]
  2071. // Is it a leading whitespace?
  2072. leading_blank = is_blank(parser.buffer, parser.buffer_pos)
  2073. // Consume the current line.
  2074. for !is_breakz(parser.buffer, parser.buffer_pos) {
  2075. s = read(parser, s)
  2076. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2077. return false
  2078. }
  2079. }
  2080. // Consume the line break.
  2081. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2082. return false
  2083. }
  2084. leading_break = read_line(parser, leading_break)
  2085. // Eat the following indentation spaces and line breaks.
  2086. if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
  2087. return false
  2088. }
  2089. }
  2090. // Chomp the tail.
  2091. if chomping != -1 {
  2092. s = append(s, leading_break...)
  2093. }
  2094. if chomping == 1 {
  2095. s = append(s, trailing_breaks...)
  2096. }
  2097. // Create a token.
  2098. *token = yaml_token_t{
  2099. typ: yaml_SCALAR_TOKEN,
  2100. start_mark: start_mark,
  2101. end_mark: end_mark,
  2102. value: s,
  2103. style: yaml_LITERAL_SCALAR_STYLE,
  2104. }
  2105. if !literal {
  2106. token.style = yaml_FOLDED_SCALAR_STYLE
  2107. }
  2108. return true
  2109. }
  2110. // Scan indentation spaces and line breaks for a block scalar. Determine the
  2111. // indentation level if needed.
  2112. func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool {
  2113. *end_mark = parser.mark
  2114. // Eat the indentation spaces and line breaks.
  2115. max_indent := 0
  2116. for {
  2117. // Eat the indentation spaces.
  2118. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2119. return false
  2120. }
  2121. for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) {
  2122. skip(parser)
  2123. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2124. return false
  2125. }
  2126. }
  2127. if parser.mark.column > max_indent {
  2128. max_indent = parser.mark.column
  2129. }
  2130. // Check for a tab character messing the indentation.
  2131. if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) {
  2132. return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  2133. start_mark, "found a tab character where an indentation space is expected")
  2134. }
  2135. // Have we found a non-empty line?
  2136. if !is_break(parser.buffer, parser.buffer_pos) {
  2137. break
  2138. }
  2139. // Consume the line break.
  2140. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2141. return false
  2142. }
  2143. // [Go] Should really be returning breaks instead.
  2144. *breaks = read_line(parser, *breaks)
  2145. *end_mark = parser.mark
  2146. }
  2147. // Determine the indentation level if needed.
  2148. if *indent == 0 {
  2149. *indent = max_indent
  2150. if *indent < parser.indent+1 {
  2151. *indent = parser.indent + 1
  2152. }
  2153. if *indent < 1 {
  2154. *indent = 1
  2155. }
  2156. }
  2157. return true
  2158. }
  2159. // Scan a quoted scalar.
  2160. func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool {
  2161. // Eat the left quote.
  2162. start_mark := parser.mark
  2163. skip(parser)
  2164. // Consume the content of the quoted scalar.
  2165. var s, leading_break, trailing_breaks, whitespaces []byte
  2166. for {
  2167. // Check that there are no document indicators at the beginning of the line.
  2168. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  2169. return false
  2170. }
  2171. if parser.mark.column == 0 &&
  2172. ((parser.buffer[parser.buffer_pos+0] == '-' &&
  2173. parser.buffer[parser.buffer_pos+1] == '-' &&
  2174. parser.buffer[parser.buffer_pos+2] == '-') ||
  2175. (parser.buffer[parser.buffer_pos+0] == '.' &&
  2176. parser.buffer[parser.buffer_pos+1] == '.' &&
  2177. parser.buffer[parser.buffer_pos+2] == '.')) &&
  2178. is_blankz(parser.buffer, parser.buffer_pos+3) {
  2179. yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
  2180. start_mark, "found unexpected document indicator")
  2181. return false
  2182. }
  2183. // Check for EOF.
  2184. if is_z(parser.buffer, parser.buffer_pos) {
  2185. yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
  2186. start_mark, "found unexpected end of stream")
  2187. return false
  2188. }
  2189. // Consume non-blank characters.
  2190. leading_blanks := false
  2191. for !is_blankz(parser.buffer, parser.buffer_pos) {
  2192. if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' {
  2193. // Is is an escaped single quote.
  2194. s = append(s, '\'')
  2195. skip(parser)
  2196. skip(parser)
  2197. } else if single && parser.buffer[parser.buffer_pos] == '\'' {
  2198. // It is a right single quote.
  2199. break
  2200. } else if !single && parser.buffer[parser.buffer_pos] == '"' {
  2201. // It is a right double quote.
  2202. break
  2203. } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) {
  2204. // It is an escaped line break.
  2205. if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
  2206. return false
  2207. }
  2208. skip(parser)
  2209. skip_line(parser)
  2210. leading_blanks = true
  2211. break
  2212. } else if !single && parser.buffer[parser.buffer_pos] == '\\' {
  2213. // It is an escape sequence.
  2214. code_length := 0
  2215. // Check the escape character.
  2216. switch parser.buffer[parser.buffer_pos+1] {
  2217. case '0':
  2218. s = append(s, 0)
  2219. case 'a':
  2220. s = append(s, '\x07')
  2221. case 'b':
  2222. s = append(s, '\x08')
  2223. case 't', '\t':
  2224. s = append(s, '\x09')
  2225. case 'n':
  2226. s = append(s, '\x0A')
  2227. case 'v':
  2228. s = append(s, '\x0B')
  2229. case 'f':
  2230. s = append(s, '\x0C')
  2231. case 'r':
  2232. s = append(s, '\x0D')
  2233. case 'e':
  2234. s = append(s, '\x1B')
  2235. case ' ':
  2236. s = append(s, '\x20')
  2237. case '"':
  2238. s = append(s, '"')
  2239. case '\'':
  2240. s = append(s, '\'')
  2241. case '\\':
  2242. s = append(s, '\\')
  2243. case 'N': // NEL (#x85)
  2244. s = append(s, '\xC2')
  2245. s = append(s, '\x85')
  2246. case '_': // #xA0
  2247. s = append(s, '\xC2')
  2248. s = append(s, '\xA0')
  2249. case 'L': // LS (#x2028)
  2250. s = append(s, '\xE2')
  2251. s = append(s, '\x80')
  2252. s = append(s, '\xA8')
  2253. case 'P': // PS (#x2029)
  2254. s = append(s, '\xE2')
  2255. s = append(s, '\x80')
  2256. s = append(s, '\xA9')
  2257. case 'x':
  2258. code_length = 2
  2259. case 'u':
  2260. code_length = 4
  2261. case 'U':
  2262. code_length = 8
  2263. default:
  2264. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2265. start_mark, "found unknown escape character")
  2266. return false
  2267. }
  2268. skip(parser)
  2269. skip(parser)
  2270. // Consume an arbitrary escape code.
  2271. if code_length > 0 {
  2272. var value int
  2273. // Scan the character value.
  2274. if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) {
  2275. return false
  2276. }
  2277. for k := 0; k < code_length; k++ {
  2278. if !is_hex(parser.buffer, parser.buffer_pos+k) {
  2279. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2280. start_mark, "did not find expected hexdecimal number")
  2281. return false
  2282. }
  2283. value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k)
  2284. }
  2285. // Check the value and write the character.
  2286. if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF {
  2287. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2288. start_mark, "found invalid Unicode character escape code")
  2289. return false
  2290. }
  2291. if value <= 0x7F {
  2292. s = append(s, byte(value))
  2293. } else if value <= 0x7FF {
  2294. s = append(s, byte(0xC0+(value>>6)))
  2295. s = append(s, byte(0x80+(value&0x3F)))
  2296. } else if value <= 0xFFFF {
  2297. s = append(s, byte(0xE0+(value>>12)))
  2298. s = append(s, byte(0x80+((value>>6)&0x3F)))
  2299. s = append(s, byte(0x80+(value&0x3F)))
  2300. } else {
  2301. s = append(s, byte(0xF0+(value>>18)))
  2302. s = append(s, byte(0x80+((value>>12)&0x3F)))
  2303. s = append(s, byte(0x80+((value>>6)&0x3F)))
  2304. s = append(s, byte(0x80+(value&0x3F)))
  2305. }
  2306. // Advance the pointer.
  2307. for k := 0; k < code_length; k++ {
  2308. skip(parser)
  2309. }
  2310. }
  2311. } else {
  2312. // It is a non-escaped non-blank character.
  2313. s = read(parser, s)
  2314. }
  2315. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2316. return false
  2317. }
  2318. }
  2319. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2320. return false
  2321. }
  2322. // Check if we are at the end of the scalar.
  2323. if single {
  2324. if parser.buffer[parser.buffer_pos] == '\'' {
  2325. break
  2326. }
  2327. } else {
  2328. if parser.buffer[parser.buffer_pos] == '"' {
  2329. break
  2330. }
  2331. }
  2332. // Consume blank characters.
  2333. for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
  2334. if is_blank(parser.buffer, parser.buffer_pos) {
  2335. // Consume a space or a tab character.
  2336. if !leading_blanks {
  2337. whitespaces = read(parser, whitespaces)
  2338. } else {
  2339. skip(parser)
  2340. }
  2341. } else {
  2342. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2343. return false
  2344. }
  2345. // Check if it is a first line break.
  2346. if !leading_blanks {
  2347. whitespaces = whitespaces[:0]
  2348. leading_break = read_line(parser, leading_break)
  2349. leading_blanks = true
  2350. } else {
  2351. trailing_breaks = read_line(parser, trailing_breaks)
  2352. }
  2353. }
  2354. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2355. return false
  2356. }
  2357. }
  2358. // Join the whitespaces or fold line breaks.
  2359. if leading_blanks {
  2360. // Do we need to fold line breaks?
  2361. if len(leading_break) > 0 && leading_break[0] == '\n' {
  2362. if len(trailing_breaks) == 0 {
  2363. s = append(s, ' ')
  2364. } else {
  2365. s = append(s, trailing_breaks...)
  2366. }
  2367. } else {
  2368. s = append(s, leading_break...)
  2369. s = append(s, trailing_breaks...)
  2370. }
  2371. trailing_breaks = trailing_breaks[:0]
  2372. leading_break = leading_break[:0]
  2373. } else {
  2374. s = append(s, whitespaces...)
  2375. whitespaces = whitespaces[:0]
  2376. }
  2377. }
  2378. // Eat the right quote.
  2379. skip(parser)
  2380. end_mark := parser.mark
  2381. // Create a token.
  2382. *token = yaml_token_t{
  2383. typ: yaml_SCALAR_TOKEN,
  2384. start_mark: start_mark,
  2385. end_mark: end_mark,
  2386. value: s,
  2387. style: yaml_SINGLE_QUOTED_SCALAR_STYLE,
  2388. }
  2389. if !single {
  2390. token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
  2391. }
  2392. return true
  2393. }
  2394. // Scan a plain scalar.
  2395. func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool {
  2396. var s, leading_break, trailing_breaks, whitespaces []byte
  2397. var leading_blanks bool
  2398. var indent = parser.indent + 1
  2399. start_mark := parser.mark
  2400. end_mark := parser.mark
  2401. // Consume the content of the plain scalar.
  2402. for {
  2403. // Check for a document indicator.
  2404. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  2405. return false
  2406. }
  2407. if parser.mark.column == 0 &&
  2408. ((parser.buffer[parser.buffer_pos+0] == '-' &&
  2409. parser.buffer[parser.buffer_pos+1] == '-' &&
  2410. parser.buffer[parser.buffer_pos+2] == '-') ||
  2411. (parser.buffer[parser.buffer_pos+0] == '.' &&
  2412. parser.buffer[parser.buffer_pos+1] == '.' &&
  2413. parser.buffer[parser.buffer_pos+2] == '.')) &&
  2414. is_blankz(parser.buffer, parser.buffer_pos+3) {
  2415. break
  2416. }
  2417. // Check for a comment.
  2418. if parser.buffer[parser.buffer_pos] == '#' {
  2419. break
  2420. }
  2421. // Consume non-blank characters.
  2422. for !is_blankz(parser.buffer, parser.buffer_pos) {
  2423. // Check for indicators that may end a plain scalar.
  2424. if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) ||
  2425. (parser.flow_level > 0 &&
  2426. (parser.buffer[parser.buffer_pos] == ',' ||
  2427. parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' ||
  2428. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
  2429. parser.buffer[parser.buffer_pos] == '}')) {
  2430. break
  2431. }
  2432. // Check if we need to join whitespaces and breaks.
  2433. if leading_blanks || len(whitespaces) > 0 {
  2434. if leading_blanks {
  2435. // Do we need to fold line breaks?
  2436. if leading_break[0] == '\n' {
  2437. if len(trailing_breaks) == 0 {
  2438. s = append(s, ' ')
  2439. } else {
  2440. s = append(s, trailing_breaks...)
  2441. }
  2442. } else {
  2443. s = append(s, leading_break...)
  2444. s = append(s, trailing_breaks...)
  2445. }
  2446. trailing_breaks = trailing_breaks[:0]
  2447. leading_break = leading_break[:0]
  2448. leading_blanks = false
  2449. } else {
  2450. s = append(s, whitespaces...)
  2451. whitespaces = whitespaces[:0]
  2452. }
  2453. }
  2454. // Copy the character.
  2455. s = read(parser, s)
  2456. end_mark = parser.mark
  2457. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2458. return false
  2459. }
  2460. }
  2461. // Is it the end?
  2462. if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) {
  2463. break
  2464. }
  2465. // Consume blank characters.
  2466. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2467. return false
  2468. }
  2469. for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
  2470. if is_blank(parser.buffer, parser.buffer_pos) {
  2471. // Check for tab characters that abuse indentation.
  2472. if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) {
  2473. yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
  2474. start_mark, "found a tab character that violates indentation")
  2475. return false
  2476. }
  2477. // Consume a space or a tab character.
  2478. if !leading_blanks {
  2479. whitespaces = read(parser, whitespaces)
  2480. } else {
  2481. skip(parser)
  2482. }
  2483. } else {
  2484. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2485. return false
  2486. }
  2487. // Check if it is a first line break.
  2488. if !leading_blanks {
  2489. whitespaces = whitespaces[:0]
  2490. leading_break = read_line(parser, leading_break)
  2491. leading_blanks = true
  2492. } else {
  2493. trailing_breaks = read_line(parser, trailing_breaks)
  2494. }
  2495. }
  2496. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2497. return false
  2498. }
  2499. }
  2500. // Check indentation level.
  2501. if parser.flow_level == 0 && parser.mark.column < indent {
  2502. break
  2503. }
  2504. }
  2505. // Create a token.
  2506. *token = yaml_token_t{
  2507. typ: yaml_SCALAR_TOKEN,
  2508. start_mark: start_mark,
  2509. end_mark: end_mark,
  2510. value: s,
  2511. style: yaml_PLAIN_SCALAR_STYLE,
  2512. }
  2513. // Note that we change the 'simple_key_allowed' flag.
  2514. if leading_blanks {
  2515. parser.simple_key_allowed = true
  2516. }
  2517. return true
  2518. }
  2519. func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool {
  2520. if parser.newlines > 0 {
  2521. return true
  2522. }
  2523. var start_mark yaml_mark_t
  2524. var text []byte
  2525. for peek := 0; peek < 512; peek++ {
  2526. if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
  2527. break
  2528. }
  2529. if is_blank(parser.buffer, parser.buffer_pos+peek) {
  2530. continue
  2531. }
  2532. if parser.buffer[parser.buffer_pos+peek] == '#' {
  2533. seen := parser.mark.index+peek
  2534. for {
  2535. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2536. return false
  2537. }
  2538. if is_breakz(parser.buffer, parser.buffer_pos) {
  2539. if parser.mark.index >= seen {
  2540. break
  2541. }
  2542. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2543. return false
  2544. }
  2545. skip_line(parser)
  2546. } else {
  2547. if parser.mark.index >= seen {
  2548. if len(text) == 0 {
  2549. start_mark = parser.mark
  2550. }
  2551. text = append(text, parser.buffer[parser.buffer_pos])
  2552. }
  2553. skip(parser)
  2554. }
  2555. }
  2556. }
  2557. break
  2558. }
  2559. if len(text) > 0 {
  2560. parser.comments = append(parser.comments, yaml_comment_t{
  2561. token_mark: token_mark,
  2562. start_mark: start_mark,
  2563. line: text,
  2564. })
  2565. }
  2566. return true
  2567. }
  2568. func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool {
  2569. token := parser.tokens[len(parser.tokens)-1]
  2570. if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 {
  2571. token = parser.tokens[len(parser.tokens)-2]
  2572. }
  2573. var token_mark = token.start_mark
  2574. var start_mark yaml_mark_t
  2575. var recent_empty = false
  2576. var first_empty = parser.newlines <= 1
  2577. var line = parser.mark.line
  2578. var column = parser.mark.column
  2579. var text []byte
  2580. // The foot line is the place where a comment must start to
  2581. // still be considered as a foot of the prior content.
  2582. // If there's some content in the currently parsed line, then
  2583. // the foot is the line below it.
  2584. var foot_line = -1
  2585. if scan_mark.line > 0 {
  2586. foot_line = parser.mark.line-parser.newlines+1
  2587. if parser.newlines == 0 && parser.mark.column > 1 {
  2588. foot_line++
  2589. }
  2590. }
  2591. var peek = 0
  2592. for ; peek < 512; peek++ {
  2593. if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
  2594. break
  2595. }
  2596. column++
  2597. if is_blank(parser.buffer, parser.buffer_pos+peek) {
  2598. continue
  2599. }
  2600. c := parser.buffer[parser.buffer_pos+peek]
  2601. if is_breakz(parser.buffer, parser.buffer_pos+peek) || parser.flow_level > 0 && (c == ']' || c == '}') {
  2602. // Got line break or terminator.
  2603. if !recent_empty {
  2604. if first_empty && (start_mark.line == foot_line || start_mark.column-1 < parser.indent) {
  2605. // This is the first empty line and there were no empty lines before,
  2606. // so this initial part of the comment is a foot of the prior token
  2607. // instead of being a head for the following one. Split it up.
  2608. if len(text) > 0 {
  2609. if start_mark.column-1 < parser.indent {
  2610. // If dedented it's unrelated to the prior token.
  2611. token_mark = start_mark
  2612. }
  2613. parser.comments = append(parser.comments, yaml_comment_t{
  2614. scan_mark: scan_mark,
  2615. token_mark: token_mark,
  2616. start_mark: start_mark,
  2617. end_mark: yaml_mark_t{parser.mark.index + peek, line, column},
  2618. foot: text,
  2619. })
  2620. scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2621. token_mark = scan_mark
  2622. text = nil
  2623. }
  2624. } else {
  2625. if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 {
  2626. text = append(text, '\n')
  2627. }
  2628. }
  2629. }
  2630. if !is_break(parser.buffer, parser.buffer_pos+peek) {
  2631. break
  2632. }
  2633. first_empty = false
  2634. recent_empty = true
  2635. column = 0
  2636. line++
  2637. continue
  2638. }
  2639. if len(text) > 0 && column < parser.indent+1 && column != start_mark.column {
  2640. // The comment at the different indentation is a foot of the
  2641. // preceding data rather than a head of the upcoming one.
  2642. parser.comments = append(parser.comments, yaml_comment_t{
  2643. scan_mark: scan_mark,
  2644. token_mark: token_mark,
  2645. start_mark: start_mark,
  2646. end_mark: yaml_mark_t{parser.mark.index + peek, line, column},
  2647. foot: text,
  2648. })
  2649. scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2650. token_mark = scan_mark
  2651. text = nil
  2652. }
  2653. if parser.buffer[parser.buffer_pos+peek] != '#' {
  2654. break
  2655. }
  2656. if len(text) == 0 {
  2657. start_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2658. } else {
  2659. text = append(text, '\n')
  2660. }
  2661. recent_empty = false
  2662. // Consume until after the consumed comment line.
  2663. seen := parser.mark.index+peek
  2664. for {
  2665. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2666. return false
  2667. }
  2668. if is_breakz(parser.buffer, parser.buffer_pos) {
  2669. if parser.mark.index >= seen {
  2670. break
  2671. }
  2672. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2673. return false
  2674. }
  2675. skip_line(parser)
  2676. } else {
  2677. if parser.mark.index >= seen {
  2678. text = append(text, parser.buffer[parser.buffer_pos])
  2679. }
  2680. skip(parser)
  2681. }
  2682. }
  2683. peek = 0
  2684. column = 0
  2685. line = parser.mark.line
  2686. }
  2687. if len(text) > 0 {
  2688. parser.comments = append(parser.comments, yaml_comment_t{
  2689. scan_mark: scan_mark,
  2690. token_mark: start_mark,
  2691. start_mark: start_mark,
  2692. end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column},
  2693. head: text,
  2694. })
  2695. }
  2696. return true
  2697. }