10. 完整的语法规范

这是完整的 Python 语法规范,直接提取自用于生成 CPython 解析器的语法 (参见 Grammar/python.gram)。 这里显示的版本省略了有关代码生成和错误恢复的细节。

所用的标记法是 EBNFPEG 的混合体。 特别地,& 后跟一个符号、形符或带括号的分组来表示正向前视(即要求执行匹配但不会被消耗掉),而 ! 表示负向前视(即要求 _不_ 执行匹配)。 我们使用 | 分隔符来表示 PEG 的“有序选择” (在传统 PEG 语法中则写为 /)。

  1. # PEG grammar for Python
  2. file: [statements] ENDMARKER
  3. interactive: statement_newline
  4. eval: expressions NEWLINE* ENDMARKER
  5. func_type: '(' [type_expressions] ')' '->' expression NEWLINE* ENDMARKER
  6. fstring: star_expressions
  7. # type_expressions allow */** but ignore them
  8. type_expressions:
  9. | ','.expression+ ',' '*' expression ',' '**' expression
  10. | ','.expression+ ',' '*' expression
  11. | ','.expression+ ',' '**' expression
  12. | '*' expression ',' '**' expression
  13. | '*' expression
  14. | '**' expression
  15. | ','.expression+
  16. statements: statement+
  17. statement: compound_stmt | simple_stmt
  18. statement_newline:
  19. | compound_stmt NEWLINE
  20. | simple_stmt
  21. | NEWLINE
  22. | ENDMARKER
  23. simple_stmt:
  24. | small_stmt !';' NEWLINE # Not needed, there for speedup
  25. | ';'.small_stmt+ [';'] NEWLINE
  26. # NOTE: assignment MUST precede expression, else parsing a simple assignment
  27. # will throw a SyntaxError.
  28. small_stmt:
  29. | assignment
  30. | star_expressions
  31. | return_stmt
  32. | import_stmt
  33. | raise_stmt
  34. | 'pass'
  35. | del_stmt
  36. | yield_stmt
  37. | assert_stmt
  38. | 'break'
  39. | 'continue'
  40. | global_stmt
  41. | nonlocal_stmt
  42. compound_stmt:
  43. | function_def
  44. | if_stmt
  45. | class_def
  46. | with_stmt
  47. | for_stmt
  48. | try_stmt
  49. | while_stmt
  50. # NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield'
  51. assignment:
  52. | NAME ':' expression ['=' annotated_rhs ]
  53. | ('(' single_target ')'
  54. | single_subscript_attribute_target) ':' expression ['=' annotated_rhs ]
  55. | (star_targets '=' )+ (yield_expr | star_expressions) !'=' [TYPE_COMMENT]
  56. | single_target augassign ~ (yield_expr | star_expressions)
  57. augassign:
  58. | '+='
  59. | '-='
  60. | '*='
  61. | '@='
  62. | '/='
  63. | '%='
  64. | '&='
  65. | '|='
  66. | '^='
  67. | '<<='
  68. | '>>='
  69. | '**='
  70. | '//='
  71. global_stmt: 'global' ','.NAME+
  72. nonlocal_stmt: 'nonlocal' ','.NAME+
  73. yield_stmt: yield_expr
  74. assert_stmt: 'assert' expression [',' expression ]
  75. del_stmt:
  76. | 'del' del_targets &(';' | NEWLINE)
  77. import_stmt: import_name | import_from
  78. import_name: 'import' dotted_as_names
  79. # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
  80. import_from:
  81. | 'from' ('.' | '...')* dotted_name 'import' import_from_targets
  82. | 'from' ('.' | '...')+ 'import' import_from_targets
  83. import_from_targets:
  84. | '(' import_from_as_names [','] ')'
  85. | import_from_as_names !','
  86. | '*'
  87. import_from_as_names:
  88. | ','.import_from_as_name+
  89. import_from_as_name:
  90. | NAME ['as' NAME ]
  91. dotted_as_names:
  92. | ','.dotted_as_name+
  93. dotted_as_name:
  94. | dotted_name ['as' NAME ]
  95. dotted_name:
  96. | dotted_name '.' NAME
  97. | NAME
  98. if_stmt:
  99. | 'if' named_expression ':' block elif_stmt
  100. | 'if' named_expression ':' block [else_block]
  101. elif_stmt:
  102. | 'elif' named_expression ':' block elif_stmt
  103. | 'elif' named_expression ':' block [else_block]
  104. else_block: 'else' ':' block
  105. while_stmt:
  106. | 'while' named_expression ':' block [else_block]
  107. for_stmt:
  108. | 'for' star_targets 'in' ~ star_expressions ':' [TYPE_COMMENT] block [else_block]
  109. | ASYNC 'for' star_targets 'in' ~ star_expressions ':' [TYPE_COMMENT] block [else_block]
  110. with_stmt:
  111. | 'with' '(' ','.with_item+ ','? ')' ':' block
  112. | 'with' ','.with_item+ ':' [TYPE_COMMENT] block
  113. | ASYNC 'with' '(' ','.with_item+ ','? ')' ':' block
  114. | ASYNC 'with' ','.with_item+ ':' [TYPE_COMMENT] block
  115. with_item:
  116. | expression 'as' star_target &(',' | ')' | ':')
  117. | expression
  118. try_stmt:
  119. | 'try' ':' block finally_block
  120. | 'try' ':' block except_block+ [else_block] [finally_block]
  121. except_block:
  122. | 'except' expression ['as' NAME ] ':' block
  123. | 'except' ':' block
  124. finally_block: 'finally' ':' block
  125. return_stmt:
  126. | 'return' [star_expressions]
  127. raise_stmt:
  128. | 'raise' expression ['from' expression ]
  129. | 'raise'
  130. function_def:
  131. | decorators function_def_raw
  132. | function_def_raw
  133. function_def_raw:
  134. | 'def' NAME '(' [params] ')' ['->' expression ] ':' [func_type_comment] block
  135. | ASYNC 'def' NAME '(' [params] ')' ['->' expression ] ':' [func_type_comment] block
  136. func_type_comment:
  137. | NEWLINE TYPE_COMMENT &(NEWLINE INDENT) # Must be followed by indented block
  138. | TYPE_COMMENT
  139. params:
  140. | parameters
  141. parameters:
  142. | slash_no_default param_no_default* param_with_default* [star_etc]
  143. | slash_with_default param_with_default* [star_etc]
  144. | param_no_default+ param_with_default* [star_etc]
  145. | param_with_default+ [star_etc]
  146. | star_etc
  147. # Some duplication here because we can't write (',' | &')'),
  148. # which is because we don't support empty alternatives (yet).
  149. #
  150. slash_no_default:
  151. | param_no_default+ '/' ','
  152. | param_no_default+ '/' &')'
  153. slash_with_default:
  154. | param_no_default* param_with_default+ '/' ','
  155. | param_no_default* param_with_default+ '/' &')'
  156. star_etc:
  157. | '*' param_no_default param_maybe_default* [kwds]
  158. | '*' ',' param_maybe_default+ [kwds]
  159. | kwds
  160. kwds: '**' param_no_default
  161. # One parameter. This *includes* a following comma and type comment.
  162. #
  163. # There are three styles:
  164. # - No default
  165. # - With default
  166. # - Maybe with default
  167. #
  168. # There are two alternative forms of each, to deal with type comments:
  169. # - Ends in a comma followed by an optional type comment
  170. # - No comma, optional type comment, must be followed by close paren
  171. # The latter form is for a final parameter without trailing comma.
  172. #
  173. param_no_default:
  174. | param ',' TYPE_COMMENT?
  175. | param TYPE_COMMENT? &')'
  176. param_with_default:
  177. | param default ',' TYPE_COMMENT?
  178. | param default TYPE_COMMENT? &')'
  179. param_maybe_default:
  180. | param default? ',' TYPE_COMMENT?
  181. | param default? TYPE_COMMENT? &')'
  182. param: NAME annotation?
  183. annotation: ':' expression
  184. default: '=' expression
  185. decorators: ('@' named_expression NEWLINE )+
  186. class_def:
  187. | decorators class_def_raw
  188. | class_def_raw
  189. class_def_raw:
  190. | 'class' NAME ['(' [arguments] ')' ] ':' block
  191. block:
  192. | NEWLINE INDENT statements DEDENT
  193. | simple_stmt
  194. expressions_list: ','.star_expression+ [',']
  195. star_expressions:
  196. | star_expression (',' star_expression )+ [',']
  197. | star_expression ','
  198. | star_expression
  199. star_expression:
  200. | '*' bitwise_or
  201. | expression
  202. star_named_expressions: ','.star_named_expression+ [',']
  203. star_named_expression:
  204. | '*' bitwise_or
  205. | named_expression
  206. named_expression:
  207. | NAME ':=' ~ expression
  208. | expression !':='
  209. annotated_rhs: yield_expr | star_expressions
  210. expressions:
  211. | expression (',' expression )+ [',']
  212. | expression ','
  213. | expression
  214. expression:
  215. | disjunction 'if' disjunction 'else' expression
  216. | disjunction
  217. | lambdef
  218. lambdef:
  219. | 'lambda' [lambda_params] ':' expression
  220. lambda_params:
  221. | lambda_parameters
  222. # lambda_parameters etc. duplicates parameters but without annotations
  223. # or type comments, and if there's no comma after a parameter, we expect
  224. # a colon, not a close parenthesis. (For more, see parameters above.)
  225. #
  226. lambda_parameters:
  227. | lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* [lambda_star_etc]
  228. | lambda_slash_with_default lambda_param_with_default* [lambda_star_etc]
  229. | lambda_param_no_default+ lambda_param_with_default* [lambda_star_etc]
  230. | lambda_param_with_default+ [lambda_star_etc]
  231. | lambda_star_etc
  232. lambda_slash_no_default:
  233. | lambda_param_no_default+ '/' ','
  234. | lambda_param_no_default+ '/' &':'
  235. lambda_slash_with_default:
  236. | lambda_param_no_default* lambda_param_with_default+ '/' ','
  237. | lambda_param_no_default* lambda_param_with_default+ '/' &':'
  238. lambda_star_etc:
  239. | '*' lambda_param_no_default lambda_param_maybe_default* [lambda_kwds]
  240. | '*' ',' lambda_param_maybe_default+ [lambda_kwds]
  241. | lambda_kwds
  242. lambda_kwds: '**' lambda_param_no_default
  243. lambda_param_no_default:
  244. | lambda_param ','
  245. | lambda_param &':'
  246. lambda_param_with_default:
  247. | lambda_param default ','
  248. | lambda_param default &':'
  249. lambda_param_maybe_default:
  250. | lambda_param default? ','
  251. | lambda_param default? &':'
  252. lambda_param: NAME
  253. disjunction:
  254. | conjunction ('or' conjunction )+
  255. | conjunction
  256. conjunction:
  257. | inversion ('and' inversion )+
  258. | inversion
  259. inversion:
  260. | 'not' inversion
  261. | comparison
  262. comparison:
  263. | bitwise_or compare_op_bitwise_or_pair+
  264. | bitwise_or
  265. compare_op_bitwise_or_pair:
  266. | eq_bitwise_or
  267. | noteq_bitwise_or
  268. | lte_bitwise_or
  269. | lt_bitwise_or
  270. | gte_bitwise_or
  271. | gt_bitwise_or
  272. | notin_bitwise_or
  273. | in_bitwise_or
  274. | isnot_bitwise_or
  275. | is_bitwise_or
  276. eq_bitwise_or: '==' bitwise_or
  277. noteq_bitwise_or:
  278. | ('!=' ) bitwise_or
  279. lte_bitwise_or: '<=' bitwise_or
  280. lt_bitwise_or: '<' bitwise_or
  281. gte_bitwise_or: '>=' bitwise_or
  282. gt_bitwise_or: '>' bitwise_or
  283. notin_bitwise_or: 'not' 'in' bitwise_or
  284. in_bitwise_or: 'in' bitwise_or
  285. isnot_bitwise_or: 'is' 'not' bitwise_or
  286. is_bitwise_or: 'is' bitwise_or
  287. bitwise_or:
  288. | bitwise_or '|' bitwise_xor
  289. | bitwise_xor
  290. bitwise_xor:
  291. | bitwise_xor '^' bitwise_and
  292. | bitwise_and
  293. bitwise_and:
  294. | bitwise_and '&' shift_expr
  295. | shift_expr
  296. shift_expr:
  297. | shift_expr '<<' sum
  298. | shift_expr '>>' sum
  299. | sum
  300. sum:
  301. | sum '+' term
  302. | sum '-' term
  303. | term
  304. term:
  305. | term '*' factor
  306. | term '/' factor
  307. | term '//' factor
  308. | term '%' factor
  309. | term '@' factor
  310. | factor
  311. factor:
  312. | '+' factor
  313. | '-' factor
  314. | '~' factor
  315. | power
  316. power:
  317. | await_primary '**' factor
  318. | await_primary
  319. await_primary:
  320. | AWAIT primary
  321. | primary
  322. primary:
  323. | primary '.' NAME
  324. | primary genexp
  325. | primary '(' [arguments] ')'
  326. | primary '[' slices ']'
  327. | atom
  328. slices:
  329. | slice !','
  330. | ','.slice+ [',']
  331. slice:
  332. | [expression] ':' [expression] [':' [expression] ]
  333. | expression
  334. atom:
  335. | NAME
  336. | 'True'
  337. | 'False'
  338. | 'None'
  339. | '__peg_parser__'
  340. | strings
  341. | NUMBER
  342. | (tuple | group | genexp)
  343. | (list | listcomp)
  344. | (dict | set | dictcomp | setcomp)
  345. | '...'
  346. strings: STRING+
  347. list:
  348. | '[' [star_named_expressions] ']'
  349. listcomp:
  350. | '[' named_expression ~ for_if_clauses ']'
  351. tuple:
  352. | '(' [star_named_expression ',' [star_named_expressions] ] ')'
  353. group:
  354. | '(' (yield_expr | named_expression) ')'
  355. genexp:
  356. | '(' expression ~ for_if_clauses ')'
  357. set: '{' expressions_list '}'
  358. setcomp:
  359. | '{' expression ~ for_if_clauses '}'
  360. dict:
  361. | '{' [double_starred_kvpairs] '}'
  362. dictcomp:
  363. | '{' kvpair for_if_clauses '}'
  364. double_starred_kvpairs: ','.double_starred_kvpair+ [',']
  365. double_starred_kvpair:
  366. | '**' bitwise_or
  367. | kvpair
  368. kvpair: expression ':' expression
  369. for_if_clauses:
  370. | for_if_clause+
  371. for_if_clause:
  372. | ASYNC 'for' star_targets 'in' ~ disjunction ('if' disjunction )*
  373. | 'for' star_targets 'in' ~ disjunction ('if' disjunction )*
  374. yield_expr:
  375. | 'yield' 'from' expression
  376. | 'yield' [star_expressions]
  377. arguments:
  378. | args [','] &')'
  379. args:
  380. | ','.(starred_expression | named_expression !'=')+ [',' kwargs ]
  381. | kwargs
  382. kwargs:
  383. | ','.kwarg_or_starred+ ',' ','.kwarg_or_double_starred+
  384. | ','.kwarg_or_starred+
  385. | ','.kwarg_or_double_starred+
  386. starred_expression:
  387. | '*' expression
  388. kwarg_or_starred:
  389. | NAME '=' expression
  390. | starred_expression
  391. kwarg_or_double_starred:
  392. | NAME '=' expression
  393. | '**' expression
  394. # NOTE: star_targets may contain *bitwise_or, targets may not.
  395. star_targets:
  396. | star_target !','
  397. | star_target (',' star_target )* [',']
  398. star_targets_seq: ','.star_target+ [',']
  399. star_target:
  400. | '*' (!'*' star_target)
  401. | t_primary '.' NAME !t_lookahead
  402. | t_primary '[' slices ']' !t_lookahead
  403. | star_atom
  404. star_atom:
  405. | NAME
  406. | '(' star_target ')'
  407. | '(' [star_targets_seq] ')'
  408. | '[' [star_targets_seq] ']'
  409. single_target:
  410. | single_subscript_attribute_target
  411. | NAME
  412. | '(' single_target ')'
  413. single_subscript_attribute_target:
  414. | t_primary '.' NAME !t_lookahead
  415. | t_primary '[' slices ']' !t_lookahead
  416. del_targets: ','.del_target+ [',']
  417. del_target:
  418. | t_primary '.' NAME !t_lookahead
  419. | t_primary '[' slices ']' !t_lookahead
  420. | del_t_atom
  421. del_t_atom:
  422. | NAME
  423. | '(' del_target ')'
  424. | '(' [del_targets] ')'
  425. | '[' [del_targets] ']'
  426. targets: ','.target+ [',']
  427. target:
  428. | t_primary '.' NAME !t_lookahead
  429. | t_primary '[' slices ']' !t_lookahead
  430. | t_atom
  431. t_primary:
  432. | t_primary '.' NAME &t_lookahead
  433. | t_primary '[' slices ']' &t_lookahead
  434. | t_primary genexp &t_lookahead
  435. | t_primary '(' [arguments] ')' &t_lookahead
  436. | atom &t_lookahead
  437. t_lookahead: '(' | '[' | '.'
  438. t_atom:
  439. | NAME
  440. | '(' target ')'
  441. | '(' [targets] ')'
  442. | '[' [targets] ']'