interpret this... (phpem 2016)
TRANSCRIPT
@asgrim
Interpret this…James Titcumb
PHPem Unconference 2016
Who is this guy?James Titcumb
www.jamestitcumb.com
www.roave.com
www.phphants.co.uk
www.phpsouthcoast.co.uk
@asgrim
@asgrim
How PHP works
PHP code
OpCacheExecute (VM)
Lexer + Parser
Compiler
@asgrim
The PHP Lexer
zend_language_scanner.l
@asgrim
zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
The PHP Lexer
zend_language_scanner.l
@asgrim
The PHP Lexer
zend_language_scanner.l
re2c
@asgrim
The PHP Lexer
zend_language_scanner.l
re2c
zend_language_scanner.c
@asgrim
The PHP Parser
zend_language_parser.y
@asgrim
zend_language_parser.yif_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
if_stmt_without_else (B)
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
if_stmt_without_else (B)
if_stmt
@asgrim
Zend_language_parser.y (PHP 7.0.10)if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
@asgrim
zend_language_parser.y (PHP 5.6.26)T_IF parenthesis_expr { zend_do_if_cond(&$2, &$1 TSRMLS_CC); }
statement { zend_do_if_after_statement(&$1, 1 TSRMLS_CC); }
void zend_do_if_cond(const znode *cond, znode *closing_bracket_token TSRMLS_DC)
{
int if_cond_op_number = get_next_op_number(CG(active_op_array));
zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
opline->opcode = ZEND_JMPZ;
SET_NODE(opline->op1, cond);
closing_bracket_token->u.op.opline_num = if_cond_op_number;
SET_UNUSED(opline->op2);
INC_BPC(CG(active_op_array));
}
@asgrim
AST is new in PHP 7+
@asgrim
How PHP works
PHP code
OpCacheExecute (VM)
Lexer + Parser
Compiler
@asgrim
Let’s simplify!
@asgrim
First… WTF is AST?
@asgrim
AST is just a data structure
@asgrim
PHP code
<?php
echo "Hello world";
@asgrim
An AST representation
Echo statement
`-- String, value "Hello world"
@asgrim
PHP code
<?php
echo "Hello " . "world";
@asgrim
An AST representation
Echo statement
`-- Concat
|-- Left
| `-- String, value "Hello "
`-- Right
`-- String, value "world"
@asgrim
PHP code
<?php
$a = 5;
$b = 3;
echo $a + ($b * 2);
@asgrim
An AST representationAssign statement
|-- Variable $a
`-- Integer, value 5
Assign statement
|-- Variable $b
`-- Integer, value 3
Echo statement
`-- Add operation
|-- Left
| `-- Variable $a
`-- Right
`-- Multiply operation
|-- Left
| `-- Variable $b
`-- Right
`-- Integer, value 2
@asgrim
AST compilationStatements
EchoAssign
Scalarvalue: (int)5
Variablename: $a
Assign
Scalarvalue: (int)3
Variablename: $b Add op
Right operandLeft operand
Variablename: $a
Multiply op
Right operandLeft operand
Variablename: $b
Scalarvalue: (int)2
@asgrim
AST compilation: pre-order traversalStatements
EchoAssign
Scalarvalue: (int)5
Variablename: $a
Assign
Scalarvalue: (int)3
Variablename: $b Add op
Right operandLeft operand
Variablename: $a
Multiply op
Right operandLeft operand
Variablename: $b
Scalarvalue: (int)2
@asgrim
Pre-order traversal: Polish notationAssign(Variable $a, Scalar 5)
Assign(Variable $b, Scalar 3)
Echo (
Add(
Variable $a,
Multiply( $b, 2 )
)
)
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
Operator Left operand Right operand
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
Operator Left operand Right operand
Operator Left operand Right operand
@asgrim
Reverse Polish Notation
1 2 3 * +
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
6
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
6
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
7
Any questions?
James Titcumb @asgrim