machine.go.rl 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. package urn
  2. import (
  3. "fmt"
  4. scimschema "github.com/leodido/go-urn/scim/schema"
  5. )
  6. var (
  7. errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]"
  8. errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its beginning) [col %d]"
  9. errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]"
  10. errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]"
  11. errHex = "expecting the percent encoded chars to be well-formed (%%alnum{2}) [col %d]"
  12. errSCIMNamespace = "expecing the SCIM namespace identifier (ietf:params:scim) [col %d]"
  13. errSCIMType = "expecting a correct SCIM type (schemas, api, param) [col %d]"
  14. errSCIMName = "expecting one or more alnum char in the SCIM name part [col %d]"
  15. errSCIMOther = "expecting a well-formed other SCIM part [col %d]"
  16. errSCIMOtherIncomplete = "expecting a not empty SCIM other part after colon [col %d]"
  17. err8141InformalID = "informal URN namespace must be in the form urn-[1-9][0-9] [col %d]"
  18. err8141SpecificString = "expecting the specific string to contain alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] not in first position) chars [col %d]"
  19. err8141Identifier = "expecting the indentifier to be a string with (length 2 to 32 chars) containing alnum (or dashes) not starting or ending with a dash [col %d]"
  20. err8141RComponentStart = "expecting only one r-component (starting with the ?+ sequence) [col %d]"
  21. err8141QComponentStart = "expecting only one q-component (starting with the ?= sequence) [col %d]"
  22. err8141MalformedRComp = "expecting a non-empty r-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]"
  23. err8141MalformedQComp = "expecting a non-empty q-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]"
  24. )
  25. %%{
  26. machine urn;
  27. # unsigned alphabet
  28. alphtype uint8;
  29. action mark {
  30. m.pb = m.p
  31. }
  32. action tolower {
  33. // List of positions in the buffer to later lowercase
  34. output.tolower = append(output.tolower, m.p - m.pb)
  35. }
  36. action set_pre {
  37. output.prefix = string(m.text())
  38. }
  39. action throw_pre_urn_err {
  40. if m.parsingMode != RFC8141Only {
  41. // Throw an error when:
  42. // - we are entering here matching the the prefix in the namespace identifier part
  43. // - looking ahead (3 chars) we find a colon
  44. if pos := m.p + 3; pos < m.pe && m.data[pos] == 58 && output.prefix != "" {
  45. m.err = fmt.Errorf(errNoUrnWithinID, pos)
  46. fhold;
  47. fgoto fail;
  48. }
  49. }
  50. }
  51. action set_nid {
  52. output.ID = string(m.text())
  53. }
  54. action set_nss {
  55. output.SS = string(m.text())
  56. // Iterate upper letters lowering them
  57. for _, i := range output.tolower {
  58. m.data[m.pb+i] = m.data[m.pb+i] + 32
  59. }
  60. output.norm = string(m.text())
  61. // Revert the buffer to the original
  62. for _, i := range output.tolower {
  63. m.data[m.pb+i] = m.data[m.pb+i] - 32
  64. }
  65. }
  66. action err_pre {
  67. m.err = fmt.Errorf(errPrefix, m.p)
  68. fhold;
  69. fgoto fail;
  70. }
  71. action err_nid {
  72. m.err = fmt.Errorf(errIdentifier, m.p)
  73. fhold;
  74. fgoto fail;
  75. }
  76. action err_nss {
  77. m.err = fmt.Errorf(errSpecificString, m.p)
  78. fhold;
  79. fgoto fail;
  80. }
  81. action err_urn {
  82. m.err = fmt.Errorf(errNoUrnWithinID, m.p)
  83. fhold;
  84. fgoto fail;
  85. }
  86. action err_hex {
  87. if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only {
  88. m.err = fmt.Errorf(errHex, m.p)
  89. fhold;
  90. fgoto fail;
  91. }
  92. }
  93. action base_type {
  94. output.kind = RFC2141;
  95. }
  96. pre = ([uU] @err(err_pre) [rR] @err(err_pre) [nN] @err(err_pre)) >mark >throw_pre_urn_err %set_pre;
  97. nid = (alnum >mark (alnum | '-'){0,31}) $err(err_nid) %set_nid;
  98. hex = '%' (digit | lower | upper >tolower){2} $err(err_hex);
  99. sss = (alnum | [()+,\-.:=@;$_!*']);
  100. nss = (sss | hex)+ $err(err_nss);
  101. nid_not_urn = (nid - pre %err(err_urn));
  102. urn = pre ':' @err(err_pre) (nid_not_urn ':' nss >mark %set_nss) %eof(base_type);
  103. ### SCIM BEG
  104. action err_scim_nid {
  105. m.err = fmt.Errorf(errSCIMNamespace, m.p)
  106. fhold;
  107. fgoto fail;
  108. }
  109. action err_scim_type {
  110. m.err = fmt.Errorf(errSCIMType, m.p)
  111. fhold;
  112. fgoto fail;
  113. }
  114. action err_scim_name {
  115. m.err = fmt.Errorf(errSCIMName, m.p)
  116. fhold;
  117. fgoto fail;
  118. }
  119. action err_scim_other {
  120. if m.p == m.pe {
  121. m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1)
  122. } else {
  123. m.err = fmt.Errorf(errSCIMOther, m.p)
  124. }
  125. fhold;
  126. fgoto fail;
  127. }
  128. action scim_type {
  129. output.kind = RFC7643;
  130. }
  131. action create_scim {
  132. output.scim = &SCIM{};
  133. }
  134. action set_scim_type {
  135. output.scim.Type = scimschema.TypeFromString(string(m.text()))
  136. }
  137. action mark_scim_name {
  138. output.scim.pos = m.p
  139. }
  140. action set_scim_name {
  141. output.scim.Name = string(m.data[output.scim.pos:m.p])
  142. }
  143. action mark_scim_other {
  144. output.scim.pos = m.p
  145. }
  146. action set_scim_other {
  147. output.scim.Other = string(m.data[output.scim.pos:m.p])
  148. }
  149. scim_nid = 'ietf:params:scim' >mark %set_nid %create_scim $err(err_scim_nid);
  150. scim_other = ':' (sss | hex)+ >mark_scim_other %set_scim_other $err(err_scim_other);
  151. scim_name = (alnum)+ >mark_scim_name %set_scim_name $err(err_scim_name);
  152. scim_type = ('schemas' | 'api' | 'param') >mark %set_scim_type $err(err_scim_type);
  153. scim_only := pre ':' @err(err_pre) (scim_nid ':' scim_type ':' scim_name scim_other? %set_nss) %eof(scim_type);
  154. ### SCIM END
  155. ### 8141 BEG
  156. action err_nss_8141 {
  157. m.err = fmt.Errorf(err8141SpecificString, m.p)
  158. fhold;
  159. fgoto fail;
  160. }
  161. action err_nid_8141 {
  162. m.err = fmt.Errorf(err8141Identifier, m.p)
  163. fhold;
  164. fgoto fail;
  165. }
  166. action rfc8141_type {
  167. output.kind = RFC8141;
  168. }
  169. action set_r_component {
  170. output.rComponent = string(m.text())
  171. }
  172. action set_q_component {
  173. output.qComponent = string(m.text())
  174. }
  175. action set_f_component {
  176. output.fComponent = string(m.text())
  177. }
  178. action informal_nid_match {
  179. fhold;
  180. m.err = fmt.Errorf(err8141InformalID, m.p);
  181. fgoto fail;
  182. }
  183. action mark_r_start {
  184. if output.rStart {
  185. m.err = fmt.Errorf(err8141RComponentStart, m.p)
  186. fhold;
  187. fgoto fail;
  188. }
  189. output.rStart = true
  190. }
  191. action mark_q_start {
  192. if output.qStart {
  193. m.err = fmt.Errorf(err8141QComponentStart, m.p)
  194. fhold;
  195. fgoto fail;
  196. }
  197. output.qStart = true
  198. }
  199. action err_malformed_r_component {
  200. m.err = fmt.Errorf(err8141MalformedRComp, m.p)
  201. fhold;
  202. fgoto fail;
  203. }
  204. action err_malformed_q_component {
  205. m.err = fmt.Errorf(err8141MalformedQComp, m.p)
  206. fhold;
  207. fgoto fail;
  208. }
  209. pchar = (sss | '~' | '&' | hex);
  210. component = pchar (pchar | '/' | '?')*;
  211. r_start = ('?+') %mark_r_start;
  212. r_component = r_start <: (r_start | component)+ $err(err_malformed_r_component) >mark %set_r_component;
  213. q_start = ('?=') %mark_q_start;
  214. q_component = q_start <: (q_start | component)+ $err(err_malformed_q_component) >mark %set_q_component;
  215. rq_components = (r_component :>> q_component? | q_component);
  216. fragment = (pchar | '/' | '?')*;
  217. f_component = '#' fragment >mark %set_f_component;
  218. nss_rfc8141 = (pchar >mark (pchar | '/')*) $err(err_nss_8141) %set_nss;
  219. nid_rfc8141 = (alnum >mark (alnum | '-'){0,30} alnum) $err(err_nid_8141) %set_nid;
  220. informal_id = pre ('-' [a-zA-z0] %to(informal_nid_match));
  221. nid_rfc8141_not_urn = (nid_rfc8141 - informal_id?);
  222. rfc8141_only := pre ':' @err(err_pre) nid_rfc8141_not_urn ':' nss_rfc8141 rq_components? f_component? %eof(rfc8141_type);
  223. ### 8141 END
  224. fail := (any - [\n\r])* @err{ fgoto main; };
  225. main := urn;
  226. }%%
  227. %% write data noerror noprefix;
  228. // Machine is the interface representing the FSM
  229. type Machine interface {
  230. Error() error
  231. Parse(input []byte) (*URN, error)
  232. WithParsingMode(ParsingMode)
  233. }
  234. type machine struct {
  235. data []byte
  236. cs int
  237. p, pe, eof, pb int
  238. err error
  239. startParsingAt int
  240. parsingMode ParsingMode
  241. parsingModeSet bool
  242. }
  243. // NewMachine creates a new FSM able to parse RFC 2141 strings.
  244. func NewMachine(options ...Option) Machine {
  245. m := &machine{
  246. parsingModeSet: false,
  247. }
  248. for _, o := range options {
  249. o(m)
  250. }
  251. // Set default parsing mode
  252. if !m.parsingModeSet {
  253. m.WithParsingMode(DefaultParsingMode)
  254. }
  255. %% access m.;
  256. %% variable p m.p;
  257. %% variable pe m.pe;
  258. %% variable eof m.eof;
  259. %% variable data m.data;
  260. return m
  261. }
  262. // Err returns the error that occurred on the last call to Parse.
  263. //
  264. // If the result is nil, then the line was parsed successfully.
  265. func (m *machine) Error() error {
  266. return m.err
  267. }
  268. func (m *machine) text() []byte {
  269. return m.data[m.pb:m.p]
  270. }
  271. // Parse parses the input byte array as a RFC 2141 or RFC7643 string.
  272. func (m *machine) Parse(input []byte) (*URN, error) {
  273. m.data = input
  274. m.p = 0
  275. m.pb = 0
  276. m.pe = len(input)
  277. m.eof = len(input)
  278. m.err = nil
  279. m.cs = m.startParsingAt
  280. output := &URN{
  281. tolower: []int{},
  282. }
  283. %% write exec;
  284. if m.cs < first_final || m.cs == en_fail {
  285. return nil, m.err
  286. }
  287. return output, nil
  288. }
  289. func (m *machine) WithParsingMode(x ParsingMode) {
  290. m.parsingMode = x
  291. switch m.parsingMode {
  292. case RFC2141Only:
  293. m.startParsingAt = en_main
  294. case RFC8141Only:
  295. m.startParsingAt = en_rfc8141_only
  296. case RFC7643Only:
  297. m.startParsingAt = en_scim_only
  298. }
  299. m.parsingModeSet = true
  300. }