diff --git a/MANIFEST b/MANIFEST index 8006cd7..628e586 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4141,6 +4141,7 @@ t/op/gv.t See if typeglobs work t/op/hashassign.t See if hash assignments work t/op/hash.t See if the complexity attackers are repelled t/op/hashwarn.t See if warnings for bad hash assignments work +t/op/heredoc.t See if heredoc edge and corner cases work t/op/inccode.t See if coderefs work in @INC t/op/inccode-tie.t See if tie to @INC works t/op/incfilter.t See if the source filters in coderef-in-@INC work diff --git a/pod/perlop.pod b/pod/perlop.pod index f52f79f..0170202 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1709,10 +1709,14 @@ the terminating string are the value of the item. The terminating string may be either an identifier (a word), or some quoted text. An unquoted identifier works like double quotes. -There may not be a space between the C<< << >> and the identifier, -unless the identifier is explicitly quoted. (If you put a space it -will be treated as a null identifier, which is valid, and matches the -first empty line.) The terminating string must appear by itself + +Prior to perl version 5.12, any space between +the C<< << >> and an unquoted alphanumeric identifier was treated +as the null identifier, which is valid, and matches the +first empty line, but now space is allowed before unquoted identifiers +as long as they are not infix keywords. + +The terminating string must appear by itself (unquoted and with no surrounding whitespace) on the terminating line. If the terminating string is quoted, the type of quotes used determine @@ -1746,6 +1750,9 @@ This is the only form of quoting in perl where there is no need to worry about escaping content, something that code generators can and do make good use of. +A bare terminator prefixed with a backslash also triggers this +quoting mode. + =item Backticks The content of the here doc is treated just as it would be if the @@ -1798,8 +1805,8 @@ you'll need to remove leading whitespace from each line manually: down from the door where it began. FINIS -If you use a here-doc within a delimited construct, such as in C, -the quoted material must come on the lines following the final delimiter. +When using a here-doc within a delimited construct, such as in C, +the quoted material starts on the line following the final delimiter. So instead of s/this/<. - Additionally, the quoting rules for the end of string identifier are not related to Perl's quoting rules -- C, C, and the like are not supported in place of C<''> and C<"">, and the only interpolation is for diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index e5ca400..caddeb5 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -141,14 +141,14 @@ Use of comma-less variable list is deprecated at - line 5. Use of comma-less variable list is deprecated at - line 5. ######## # toke.c -use warnings 'deprecated' ; +use warnings 'syntax' ; $a = <<; -no warnings 'deprecated' ; +no warnings 'syntax' ; $a = <<; EXPECT -Use of bare << to mean <<"" is deprecated at - line 3. + << interpreted as heredoc with null terminator at - line 3. ######## # toke.c use warnings 'syntax' ; diff --git a/t/test.pl b/t/test.pl index 32c4a37..e96d2c7 100644 --- a/t/test.pl +++ b/t/test.pl @@ -660,8 +660,8 @@ sub _fresh_perl { my($prog, $resolve, $runperl_args, $name) = @_; $runperl_args ||= {}; - $runperl_args->{progfile} = $tmpfile; - $runperl_args->{stderr} = 1; + $runperl_args->{progfile} ||= $tmpfile; + $runperl_args->{stderr} = 1 unless exists $runperl_args->{stderr}; open TEST, ">$tmpfile" or die "Cannot open $tmpfile: $!"; @@ -698,12 +698,6 @@ sub _fresh_perl { } my $pass = $resolve->($results); - unless ($pass) { - _diag "# PROG: \n$prog\n"; - _diag "# EXPECTED:\n", $resolve->(), "\n"; - _diag "# GOT:\n$results\n"; - _diag "# STATUS: $status\n"; - } # Use the first line of the program as a name if none was given unless( $name ) { @@ -712,6 +706,15 @@ sub _fresh_perl { } _ok($pass, _where(), "fresh_perl - $name"); + + unless ($pass) { + _diag "# PROG: \n$prog\n"; + _diag "# EXPECTED:\n", $resolve->(), "\n"; + _diag "# GOT:\n$results\n"; + _diag "# STATUS: $status\n"; + } + + return $pass; } # diff --git a/toke.c b/toke.c index a15dca6..0342ced 100644 --- a/toke.c +++ b/toke.c @@ -11240,6 +11240,29 @@ S_scan_trans(pTHX_ char *start) return s; } +/* and cmp eq if for foreach ge gt le lt ne or unless until while x */ +#define LeftBindingBareword(p) (\ +(*p == 'x' && !isALNUM_lazy_if(&p[1],UTF)) ||\ +(*p == 'e' && p[1] == 'q' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'i' && p[1] == 'f' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'g' && p[1] == 'e' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'g' && p[1] == 't' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'l' && p[1] == 'e' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'l' && p[1] == 't' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'n' && p[1] == 'e' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'o' && p[1] == 'r' && !isALNUM_lazy_if(&p[2],UTF)) ||\ +(*p == 'a' && p[1] == 'n' && p[2] == 'd' && !isALNUM_lazy_if(&p[3],UTF)) ||\ +(*p == 'c' && p[1] == 'm' && p[2] == 'p' && !isALNUM_lazy_if(&p[3],UTF)) ||\ +(*p == 'f' && p[1] == 'o' && p[2] == 'r' && (!isALNUM_lazy_if(&p[3],UTF) ||\ + (p[3] == 'e' && p[4] == 'a' && p[5] == 'c' && p[6] == 'h' && \ + !isALNUM_lazy_if(&p[3],UTF)))) ||\ +(*p == 'u' && p[1] == 'n' && ( \ + (p[2] == 't' && p[3]=='i' && p[4]=='l' && !isALNUM_lazy_if(&p[3],UTF)) || \ + (p[2] == 'l' && p[3]=='e' && p[4]=='s' && p[5] == 's' && \ + !isALNUM_lazy_if(&p[6],UTF)))) ||\ +(*p == 'w' && p[1] == 'h' && p[2] == 'i' && p[3]== 'l' && p[4] == 'e' && \ + !isALNUM_lazy_if(&p[5],UTF)) \ +) STATIC char * S_scan_heredoc(pTHX_ register char *s) @@ -11268,25 +11291,29 @@ S_scan_heredoc(pTHX_ register char *s) d = PL_tokenbuf; e = PL_tokenbuf + sizeof PL_tokenbuf - 1; if (!outer) - *d++ = '\n'; + *d++ = '\n'; peek = s; while (SPACE_OR_TAB(*peek)) - peek++; + peek++; if (*peek == '`' || *peek == '\'' || *peek =='"') { - s = peek; - term = *s++; - s = delimcpy(d, e, s, PL_bufend, term, &len); - d += len; - if (s < PL_bufend) - s++; + s = peek; + term = *s++; + s = delimcpy(d, e, s, PL_bufend, term, &len); + d += len; + if (s < PL_bufend) + s++; } else { + if (*peek == '\\' || ( isALNUM_lazy_if(peek,UTF) && !LeftBindingBareword(peek))) + s = peek; if (*s == '\\') + /* <<\FOO is equivalent to <<'FOO' */ s++, term = '\''; else term = '"'; - if (!isALNUM_lazy_if(s,UTF)) - deprecate_old("bare << to mean <<\"\""); + if (!isALNUM_lazy_if(s,UTF) && ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + " << interpreted as heredoc with null terminator"); for (; isALNUM_lazy_if(s,UTF); s++) { if (d < e) *d++ = *s; @@ -11448,8 +11475,30 @@ S_scan_heredoc(pTHX_ register char *s) #endif if (!outer || !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { - CopLINE_set(PL_curcop, (line_t)PL_multi_start); - missingterm(PL_tokenbuf); + I32 len_without; + /* in other words, we are out of input */ + /* maybe the string terminator didn't have a newline? */ + sv_catpvn(tmpstr,"\n",1); /* add a newline */ + len_without = SvCUR(tmpstr) - len; + s = SvPVX(tmpstr) + len_without; + if ( len_without > -1 /* there has been enough data */ + && *(s-1) == '\n' /* the terminator starts at line-begin */ + && *s == term /* the terminator starts correctly */ + && memEQ(s,PL_tokenbuf,len) /* and checks out okay */ + ) + { + /* chop the quoted heredoc at the right place */ + SvCUR_set(tmpstr,len_without); + /* pretend we got the string terminator from filter_gets */ + sv_setpvn(PL_linestr,s,len); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);; + } + else + { + /* No, we have reached EOF without finding the terminator. */ + CopLINE_set(PL_curcop, (line_t)PL_multi_start); + missingterm(PL_tokenbuf); + } } #ifdef PERL_MAD stuffstart = s - SvPVX(PL_linestr);