diff --git a/ChangeLog b/ChangeLog index bf0f749..913b4a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2011-07-05 John Haque + + * awk.h (Op_sub_builtin): New opcode. + (GSUB, GENSUB, AFTER_ASSIGN, LITERAL): New flags for + Op_sub_builtin. + * awkgram.y (struct tokentab): Change opcode to Op_sub_builtin + for sub, gsub and gensub. + (snode): Update processing of sub, gsub and gensub. + * builtin.c (do_sub, do_gsub, do_gensub): Nuke. + (sub_common): Renamed to do_sub. Relocate gensub argument + handling code from do_gensub to here; Simplify the code a + little bit. + * eval.c (r_interpret): Handle Op_sub_builtin. Avoid field + re-splitting or $0 rebuilding if (g)sub target string is + a field and no substitutions were done. + * pprint (profile.c): Add case for the new opcode. + * print_instruction (debug.c): Ditto. + 2011-06-24 Arnold D. Robbins * Makefile.am (EXTRA_DIST): Add ChangeLog.0. diff --git a/awk.h b/awk.h index 25abf41..e224061 100644 --- a/awk.h +++ b/awk.h @@ -521,6 +521,7 @@ typedef enum opcodeval { Op_K_nextfile, Op_builtin, + Op_sub_builtin, /* sub, gsub and gensub */ Op_in_array, /* boolean test of membership in array */ /* function call instruction */ @@ -626,6 +627,16 @@ typedef struct exp_instruction { #define target_jmp d.di #define target_break x.xi +/* Op_sub_builtin */ +#define sub_flags d.dl +#define GSUB 0x01 /* builtin is gsub */ +#define GENSUB 0x02 /* builtin is gensub */ +#define AFTER_ASSIGN 0x04 /* (g)sub target is a field or a special var with + * set_XX routine. + */ +#define LITERAL 0x08 /* target is a literal string */ + + /* Op_K_exit */ #define target_end d.di #define target_atexit x.xi @@ -1181,9 +1192,7 @@ extern NODE *do_cos(int nargs); extern NODE *do_rand(int nargs); extern NODE *do_srand(int nargs); extern NODE *do_match(int nargs); -extern NODE *do_gsub(int nargs); -extern NODE *do_sub(int nargs); -extern NODE *do_gensub(int nargs); +extern NODE *do_sub(int nargs, unsigned int flags, int *num_matches); extern NODE *format_tree(const char *, size_t, NODE **, long); extern NODE *do_lshift(int nargs); extern NODE *do_rshift(int nargs); diff --git a/awkgram.c b/awkgram.c index 4edec57..ac4ceaa 100644 --- a/awkgram.c +++ b/awkgram.c @@ -2065,7 +2065,7 @@ yyreduce: { case 3: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 221 "awkgram.y" { rule = 0; @@ -2075,7 +2075,7 @@ yyreduce: case 5: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 227 "awkgram.y" { next_sourcefile(); @@ -2084,7 +2084,7 @@ yyreduce: case 6: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 231 "awkgram.y" { rule = 0; @@ -2098,7 +2098,7 @@ yyreduce: case 7: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 243 "awkgram.y" { (void) append_rule((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); @@ -2107,7 +2107,7 @@ yyreduce: case 8: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 247 "awkgram.y" { if (rule != Rule) { @@ -2123,7 +2123,7 @@ yyreduce: case 9: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 258 "awkgram.y" { can_return = FALSE; @@ -2136,7 +2136,7 @@ yyreduce: case 10: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 266 "awkgram.y" { want_source = FALSE; @@ -2146,7 +2146,7 @@ yyreduce: case 11: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 274 "awkgram.y" { if (include_source((yyvsp[(1) - (1)])) < 0) @@ -2159,35 +2159,35 @@ yyreduce: case 12: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 282 "awkgram.y" { (yyval) = NULL; } break; case 13: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 284 "awkgram.y" { (yyval) = NULL; } break; case 14: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 289 "awkgram.y" { (yyval) = NULL; rule = Rule; } break; case 15: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 291 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); rule = Rule; } break; case 16: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 293 "awkgram.y" { INSTRUCTION *tp; @@ -2218,7 +2218,7 @@ yyreduce: case 17: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 319 "awkgram.y" { static int begin_seen = 0; @@ -2234,7 +2234,7 @@ yyreduce: case 18: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 330 "awkgram.y" { static int end_seen = 0; @@ -2250,7 +2250,7 @@ yyreduce: case 19: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 341 "awkgram.y" { (yyvsp[(1) - (1)])->in_rule = rule = BEGINFILE; @@ -2261,7 +2261,7 @@ yyreduce: case 20: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 347 "awkgram.y" { (yyvsp[(1) - (1)])->in_rule = rule = ENDFILE; @@ -2272,7 +2272,7 @@ yyreduce: case 21: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 356 "awkgram.y" { if ((yyvsp[(2) - (5)]) == NULL) @@ -2284,21 +2284,21 @@ yyreduce: case 22: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 366 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 23: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 368 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 24: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 370 "awkgram.y" { yyerror(_("`%s' is a built-in function, it cannot be redefined"), @@ -2314,14 +2314,14 @@ yyreduce: case 25: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 381 "awkgram.y" { (yyval) = (yyvsp[(2) - (2)]); } break; case 28: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 391 "awkgram.y" { param_counter = 0; @@ -2331,7 +2331,7 @@ yyreduce: case 29: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 396 "awkgram.y" { NODE *t; @@ -2353,14 +2353,14 @@ yyreduce: case 30: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 420 "awkgram.y" { ++want_regexp; } break; case 31: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 422 "awkgram.y" { NODE *n, *exp; @@ -2393,21 +2393,21 @@ yyreduce: case 32: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 453 "awkgram.y" { bcfree((yyvsp[(1) - (1)])); } break; case 34: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 459 "awkgram.y" { (yyval) = NULL; } break; case 35: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 461 "awkgram.y" { if ((yyvsp[(2) - (2)]) == NULL) @@ -2425,28 +2425,28 @@ yyreduce: case 36: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 474 "awkgram.y" { (yyval) = NULL; } break; case 39: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 484 "awkgram.y" { (yyval) = NULL; } break; case 40: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 486 "awkgram.y" { (yyval) = (yyvsp[(2) - (3)]); } break; case 41: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 488 "awkgram.y" { if (do_profiling) @@ -2458,7 +2458,7 @@ yyreduce: case 42: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 495 "awkgram.y" { INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt; @@ -2553,7 +2553,7 @@ yyreduce: case 43: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 585 "awkgram.y" { /* @@ -2600,7 +2600,7 @@ yyreduce: case 44: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 627 "awkgram.y" { /* @@ -2647,7 +2647,7 @@ yyreduce: case 45: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 669 "awkgram.y" { INSTRUCTION *ip; @@ -2767,7 +2767,7 @@ regular_loop: case 46: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 784 "awkgram.y" { (yyval) = mk_for_loop((yyvsp[(1) - (12)]), (yyvsp[(3) - (12)]), (yyvsp[(6) - (12)]), (yyvsp[(9) - (12)]), (yyvsp[(12) - (12)])); @@ -2779,7 +2779,7 @@ regular_loop: case 47: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 791 "awkgram.y" { (yyval) = mk_for_loop((yyvsp[(1) - (11)]), (yyvsp[(3) - (11)]), (INSTRUCTION *) NULL, (yyvsp[(8) - (11)]), (yyvsp[(11) - (11)])); @@ -2791,7 +2791,7 @@ regular_loop: case 48: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 798 "awkgram.y" { if (do_profiling) @@ -2803,7 +2803,7 @@ regular_loop: case 49: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 808 "awkgram.y" { if (! break_allowed) @@ -2817,7 +2817,7 @@ regular_loop: case 50: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 817 "awkgram.y" { if (! continue_allowed) @@ -2831,7 +2831,7 @@ regular_loop: case 51: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 826 "awkgram.y" { /* if inside function (rule = 0), resolve context at run-time */ @@ -2845,7 +2845,7 @@ regular_loop: case 52: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 835 "awkgram.y" { if (do_traditional) @@ -2865,7 +2865,7 @@ regular_loop: case 53: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 850 "awkgram.y" { /* Initialize the two possible jump targets, the actual target @@ -2885,7 +2885,7 @@ regular_loop: case 54: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 865 "awkgram.y" { if (! can_return) @@ -2895,7 +2895,7 @@ regular_loop: case 55: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 868 "awkgram.y" { if ((yyvsp[(3) - (4)]) == NULL) { @@ -2909,14 +2909,14 @@ regular_loop: case 57: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 888 "awkgram.y" { in_print = TRUE; in_parens = 0; } break; case 58: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 889 "awkgram.y" { /* @@ -3016,14 +3016,14 @@ regular_loop: case 59: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 984 "awkgram.y" { sub_counter = 0; } break; case 60: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 985 "awkgram.y" { char *arr = (yyvsp[(2) - (4)])->lextok; @@ -3053,7 +3053,7 @@ regular_loop: case 61: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1014 "awkgram.y" { static short warned = FALSE; @@ -3077,35 +3077,35 @@ regular_loop: case 62: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1033 "awkgram.y" { (yyval) = optimize_assignment((yyvsp[(1) - (1)])); } break; case 63: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1038 "awkgram.y" { (yyval) = NULL; } break; case 64: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1040 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 65: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1045 "awkgram.y" { (yyval) = NULL; } break; case 66: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1047 "awkgram.y" { if ((yyvsp[(1) - (2)]) == NULL) @@ -3117,14 +3117,14 @@ regular_loop: case 67: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1054 "awkgram.y" { (yyval) = NULL; } break; case 68: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1059 "awkgram.y" { INSTRUCTION *casestmt = (yyvsp[(5) - (5)]); @@ -3141,7 +3141,7 @@ regular_loop: case 69: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1071 "awkgram.y" { INSTRUCTION *casestmt = (yyvsp[(4) - (4)]); @@ -3157,14 +3157,14 @@ regular_loop: case 70: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1085 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 71: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1087 "awkgram.y" { (yyvsp[(2) - (2)])->memory->numbr = -(force_number((yyvsp[(2) - (2)])->memory)); @@ -3175,7 +3175,7 @@ regular_loop: case 72: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1093 "awkgram.y" { bcfree((yyvsp[(1) - (2)])); @@ -3185,14 +3185,14 @@ regular_loop: case 73: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1098 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 74: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1100 "awkgram.y" { (yyvsp[(1) - (1)])->opcode = Op_push_re; @@ -3202,21 +3202,21 @@ regular_loop: case 75: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1108 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 76: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1110 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 78: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1120 "awkgram.y" { (yyval) = (yyvsp[(2) - (3)]); @@ -3225,7 +3225,7 @@ regular_loop: case 79: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1127 "awkgram.y" { in_print = FALSE; @@ -3236,14 +3236,14 @@ regular_loop: case 80: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1132 "awkgram.y" { in_print = FALSE; in_parens = 0; } break; case 81: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1133 "awkgram.y" { if ((yyvsp[(1) - (3)])->redir_type == redirect_twoway @@ -3256,7 +3256,7 @@ regular_loop: case 82: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1144 "awkgram.y" { (yyval) = mk_condition((yyvsp[(3) - (6)]), (yyvsp[(1) - (6)]), (yyvsp[(6) - (6)]), NULL, NULL); @@ -3265,7 +3265,7 @@ regular_loop: case 83: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1149 "awkgram.y" { (yyval) = mk_condition((yyvsp[(3) - (9)]), (yyvsp[(1) - (9)]), (yyvsp[(6) - (9)]), (yyvsp[(7) - (9)]), (yyvsp[(9) - (9)])); @@ -3274,14 +3274,14 @@ regular_loop: case 88: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1166 "awkgram.y" { (yyval) = NULL; } break; case 89: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1168 "awkgram.y" { bcfree((yyvsp[(1) - (2)])); @@ -3291,7 +3291,7 @@ regular_loop: case 92: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1181 "awkgram.y" { append_param((yyvsp[(1) - (1)])->lextok); @@ -3302,7 +3302,7 @@ regular_loop: case 93: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1187 "awkgram.y" { append_param((yyvsp[(3) - (3)])->lextok); @@ -3314,63 +3314,63 @@ regular_loop: case 94: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1194 "awkgram.y" { /* func_params = NULL; */ } break; case 95: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1196 "awkgram.y" { /* func_params = NULL; */ } break; case 96: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1198 "awkgram.y" { /* func_params = NULL; */ } break; case 97: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1204 "awkgram.y" { (yyval) = NULL; } break; case 98: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1206 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 99: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1211 "awkgram.y" { (yyval) = NULL; } break; case 100: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1213 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 101: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1218 "awkgram.y" { (yyval) = mk_expression_list(NULL, (yyvsp[(1) - (1)])); } break; case 102: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1220 "awkgram.y" { (yyval) = mk_expression_list((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); @@ -3380,35 +3380,35 @@ regular_loop: case 103: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1225 "awkgram.y" { (yyval) = NULL; } break; case 104: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1227 "awkgram.y" { (yyval) = NULL; } break; case 105: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1229 "awkgram.y" { (yyval) = NULL; } break; case 106: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1231 "awkgram.y" { (yyval) = NULL; } break; case 107: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1237 "awkgram.y" { if (do_lint && (yyvsp[(3) - (3)])->lasti->opcode == Op_match_rec) @@ -3420,21 +3420,21 @@ regular_loop: case 108: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1244 "awkgram.y" { (yyval) = mk_boolean((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 109: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1246 "awkgram.y" { (yyval) = mk_boolean((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 110: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1248 "awkgram.y" { if ((yyvsp[(1) - (3)])->lasti->opcode == Op_match_rec) @@ -3455,7 +3455,7 @@ regular_loop: case 111: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1264 "awkgram.y" { if (do_lint_old) @@ -3470,7 +3470,7 @@ regular_loop: case 112: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1274 "awkgram.y" { if (do_lint && (yyvsp[(3) - (3)])->lasti->opcode == Op_match_rec) @@ -3482,35 +3482,35 @@ regular_loop: case 113: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1281 "awkgram.y" { (yyval) = mk_condition((yyvsp[(1) - (5)]), (yyvsp[(2) - (5)]), (yyvsp[(3) - (5)]), (yyvsp[(4) - (5)]), (yyvsp[(5) - (5)])); } break; case 114: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1283 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 115: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1288 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 116: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1290 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 117: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1292 "awkgram.y" { (yyvsp[(2) - (2)])->opcode = Op_assign_quotient; @@ -3520,49 +3520,49 @@ regular_loop: case 118: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1300 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 119: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1302 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 120: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1307 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 121: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1309 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 122: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1314 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 123: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1316 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 124: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1318 "awkgram.y" { int count = 2; @@ -3617,49 +3617,49 @@ regular_loop: case 126: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1373 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 127: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1375 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 128: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1377 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 129: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1379 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 130: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1381 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 131: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1383 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 132: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1385 "awkgram.y" { /* @@ -3687,7 +3687,7 @@ regular_loop: case 133: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1408 "awkgram.y" { (yyvsp[(2) - (2)])->opcode = Op_postincrement; @@ -3697,7 +3697,7 @@ regular_loop: case 134: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1413 "awkgram.y" { (yyvsp[(2) - (2)])->opcode = Op_postdecrement; @@ -3707,7 +3707,7 @@ regular_loop: case 135: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1418 "awkgram.y" { if (do_lint_old) { @@ -3732,7 +3732,7 @@ regular_loop: case 136: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1443 "awkgram.y" { (yyval) = mk_getline((yyvsp[(3) - (4)]), (yyvsp[(4) - (4)]), (yyvsp[(1) - (4)]), (yyvsp[(2) - (4)])->redir_type); @@ -3742,49 +3742,49 @@ regular_loop: case 137: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1449 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 138: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1451 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 139: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1453 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 140: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1455 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 141: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1457 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 142: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1459 "awkgram.y" { (yyval) = mk_binary((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)]), (yyvsp[(2) - (3)])); } break; case 143: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1464 "awkgram.y" { (yyval) = list_create((yyvsp[(1) - (1)])); @@ -3793,7 +3793,7 @@ regular_loop: case 144: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1468 "awkgram.y" { if ((yyvsp[(2) - (2)])->opcode == Op_match_rec) { @@ -3829,14 +3829,14 @@ regular_loop: case 145: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1499 "awkgram.y" { (yyval) = (yyvsp[(2) - (3)]); } break; case 146: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1501 "awkgram.y" { (yyval) = snode((yyvsp[(3) - (4)]), (yyvsp[(1) - (4)])); @@ -3847,7 +3847,7 @@ regular_loop: case 147: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1507 "awkgram.y" { (yyval) = snode((yyvsp[(3) - (4)]), (yyvsp[(1) - (4)])); @@ -3858,7 +3858,7 @@ regular_loop: case 148: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1513 "awkgram.y" { static short warned1 = FALSE; @@ -3876,7 +3876,7 @@ regular_loop: case 151: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1528 "awkgram.y" { (yyvsp[(1) - (2)])->opcode = Op_preincrement; @@ -3886,7 +3886,7 @@ regular_loop: case 152: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1533 "awkgram.y" { (yyvsp[(1) - (2)])->opcode = Op_predecrement; @@ -3896,7 +3896,7 @@ regular_loop: case 153: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1538 "awkgram.y" { (yyval) = list_create((yyvsp[(1) - (1)])); @@ -3905,7 +3905,7 @@ regular_loop: case 154: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1542 "awkgram.y" { (yyval) = list_create((yyvsp[(1) - (1)])); @@ -3914,7 +3914,7 @@ regular_loop: case 155: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1546 "awkgram.y" { if ((yyvsp[(2) - (2)])->lasti->opcode == Op_push_i @@ -3931,7 +3931,7 @@ regular_loop: case 156: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1558 "awkgram.y" { /* @@ -3946,7 +3946,7 @@ regular_loop: case 157: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1571 "awkgram.y" { func_use((yyvsp[(1) - (1)])->lasti->func_name, FUNC_USE); @@ -3956,7 +3956,7 @@ regular_loop: case 158: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1576 "awkgram.y" { /* indirect function call */ @@ -3994,7 +3994,7 @@ regular_loop: case 159: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1612 "awkgram.y" { param_sanity((yyvsp[(3) - (4)])); @@ -4013,42 +4013,42 @@ regular_loop: case 160: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1629 "awkgram.y" { (yyval) = NULL; } break; case 161: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1631 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 162: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1636 "awkgram.y" { (yyval) = NULL; } break; case 163: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1638 "awkgram.y" { (yyval) = (yyvsp[(1) - (2)]); } break; case 164: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1643 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 165: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1645 "awkgram.y" { (yyval) = list_merge((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); @@ -4057,7 +4057,7 @@ regular_loop: case 166: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1652 "awkgram.y" { INSTRUCTION *ip = (yyvsp[(1) - (1)])->lasti; @@ -4076,7 +4076,7 @@ regular_loop: case 167: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1669 "awkgram.y" { INSTRUCTION *t = (yyvsp[(2) - (3)]); @@ -4095,14 +4095,14 @@ regular_loop: case 168: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1686 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); } break; case 169: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1688 "awkgram.y" { (yyval) = list_merge((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); @@ -4111,14 +4111,14 @@ regular_loop: case 170: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1695 "awkgram.y" { (yyval) = (yyvsp[(1) - (2)]); } break; case 171: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1700 "awkgram.y" { char *var_name = (yyvsp[(1) - (1)])->lextok; @@ -4131,7 +4131,7 @@ regular_loop: case 172: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1708 "awkgram.y" { NODE *n; @@ -4147,7 +4147,7 @@ regular_loop: case 173: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1722 "awkgram.y" { INSTRUCTION *ip = (yyvsp[(1) - (1)])->nexti; @@ -4165,7 +4165,7 @@ regular_loop: case 174: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1735 "awkgram.y" { (yyval) = list_append((yyvsp[(2) - (3)]), (yyvsp[(1) - (3)])); @@ -4176,7 +4176,7 @@ regular_loop: case 175: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1744 "awkgram.y" { (yyvsp[(1) - (1)])->opcode = Op_postincrement; @@ -4185,7 +4185,7 @@ regular_loop: case 176: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1748 "awkgram.y" { (yyvsp[(1) - (1)])->opcode = Op_postdecrement; @@ -4194,49 +4194,49 @@ regular_loop: case 177: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1751 "awkgram.y" { (yyval) = NULL; } break; case 179: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1759 "awkgram.y" { yyerrok; } break; case 180: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1763 "awkgram.y" { yyerrok; } break; case 183: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1772 "awkgram.y" { yyerrok; } break; case 184: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1776 "awkgram.y" { (yyval) = (yyvsp[(1) - (1)]); yyerrok; } break; case 185: -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 1780 "awkgram.y" { yyerrok; } break; -/* Line 1806 of yacc.c */ +/* Line 1821 of yacc.c */ #line 4253 "awkgram.c" default: break; } @@ -4485,6 +4485,7 @@ struct token { # define RESX 0x0800 /* Bell Labs Research extension */ # define BREAK 0x1000 /* break allowed inside */ # define CONTINUE 0x2000 /* continue allowed inside */ + NODE *(*ptr)(int); /* function that implements this keyword */ }; @@ -4542,9 +4543,9 @@ static const struct token tokentab[] = { {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, +{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0}, {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"if", Op_K_if, LEX_IF, 0, 0}, {"in", Op_symbol, LEX_IN, 0, 0}, {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0}, @@ -4575,7 +4576,7 @@ static const struct token tokentab[] = { #endif {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime}, {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0}, {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, @@ -6286,8 +6287,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) assert(nexp > 0); } - r->builtin = tokentab[idx].ptr; - /* check against how many args. are allowed for this builtin */ args_allowed = tokentab[idx].flags & ARGS; if (args_allowed && (args_allowed & A(nexp)) == 0) { @@ -6296,7 +6295,85 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) return NULL; } + /* special processing for sub, gsub and gensub */ + + if (tokentab[idx].value == Op_sub_builtin) { + const char *operator = tokentab[idx].operator; + + r->sub_flags = 0; + + arg = subn->nexti; /* first arg list */ + (void) mk_rexp(arg); + + if (strcmp(operator, "gensub") != 0) { + /* sun and gsub */ + + if (strcmp(operator, "gsub") == 0) + r->sub_flags |= GSUB; + + arg = arg->lasti->nexti; /* 2nd arg list */ + if (nexp == 2) { + INSTRUCTION *expr; + expr = list_create(instruction(Op_push_i)); + expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(expr, instruction(Op_field_spec))); + } + + arg = arg->lasti->nexti; /* third arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push_i) { + if (do_lint) + lintwarn(_("%s: string literal as last arg of substitute has no effect"), + operator); + r->sub_flags |= LITERAL; + } else { + if (make_assignable(ip) == NULL) + yyerror(_("%s third parameter is not a changeable object"), + operator); + else + ip->do_reference = TRUE; + } + + r->expr_count = count_expressions(&subn, FALSE); + ip = subn->lasti; + + (void) list_append(subn, r); + + /* add after_assign code */ + if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { + (void) list_append(subn, instruction(Op_var_assign)); + subn->lasti->memory = ip->memory; + subn->lasti->assign_var = ip->memory->var_assign; + r->sub_flags |= AFTER_ASSIGN; + } else if (ip->opcode == Op_field_spec_lhs) { + (void) list_append(subn, instruction(Op_field_assign)); + subn->lasti->field_assign = (Func_ptr) 0; + ip->target_assign = subn->lasti; + r->sub_flags |= AFTER_ASSIGN; + } + return subn; + + } else { + /* gensub */ + + r->sub_flags |= GENSUB; + if (nexp == 3) { + ip = instruction(Op_push_i); + ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(list_create(ip), instruction(Op_field_spec))); + } + + r->expr_count = count_expressions(&subn, FALSE); + return list_append(subn, r); + } + } + + r->builtin = tokentab[idx].ptr; + /* special case processing for a few builtins */ + if (r->builtin == do_length) { if (nexp == 0) { /* no args. Use $0 */ @@ -6338,71 +6415,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) if (/*ip == arg->nexti && */ ip->opcode == Op_push) ip->opcode = Op_push_array; } - } else if (r->builtin == do_sub || r->builtin == do_gsub) { - int literal = FALSE; - - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); - - arg = arg->lasti->nexti; /* 2nd arg list */ - if (nexp == 2) { - INSTRUCTION *expr; - expr = list_create(instruction(Op_push_i)); - expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(expr, instruction(Op_field_spec))); - } - - arg = arg->lasti->nexti; /* third arg list */ - ip = arg->lasti; - if (ip->opcode == Op_push_i) { - if (do_lint) - lintwarn(_("%s: string literal as last arg of substitute has no effect"), - (r->builtin == do_sub) ? "sub" : "gsub"); - literal = TRUE; - } else { - if (make_assignable(ip) == NULL) - yyerror(_("%s third parameter is not a changeable object"), - (r->builtin == do_sub) ? "sub" : "gsub"); - else - ip->do_reference = TRUE; - } - - /* kludge: This is one of the few cases - * when we need to know the type of item on stack. - * In case of string literal as the last argument, - * pass 4 as # of args (See sub_common code in builtin.c). - * Other cases like length(array or scalar) seem - * to work out ok. - */ - - r->expr_count = count_expressions(&subn, FALSE) + !!literal; - ip = subn->lasti; - - (void) list_append(subn, r); - - /* add after_assign bytecode(s) */ - if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { - (void) list_append(subn, instruction(Op_var_assign)); - subn->lasti->memory = ip->memory; - subn->lasti->assign_var = ip->memory->var_assign; - } else if (ip->opcode == Op_field_spec_lhs) { - (void) list_append(subn, instruction(Op_field_assign)); - subn->lasti->field_assign = (Func_ptr) 0; - ip->target_assign = subn->lasti; - } - return subn; - } else if (r->builtin == do_gensub) { - if (nexp == 3) { - arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ - ip = instruction(Op_push_i); - ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(list_create(ip), - instruction(Op_field_spec))); - } - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); } else if (r->builtin == do_split) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; diff --git a/awkgram.y b/awkgram.y index 6b28b52..4553d09 100644 --- a/awkgram.y +++ b/awkgram.y @@ -1795,6 +1795,7 @@ struct token { # define RESX 0x0800 /* Bell Labs Research extension */ # define BREAK 0x1000 /* break allowed inside */ # define CONTINUE 0x2000 /* continue allowed inside */ + NODE *(*ptr)(int); /* function that implements this keyword */ }; @@ -1852,9 +1853,9 @@ static const struct token tokentab[] = { {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, +{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0}, {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"if", Op_K_if, LEX_IF, 0, 0}, {"in", Op_symbol, LEX_IN, 0, 0}, {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0}, @@ -1885,7 +1886,7 @@ static const struct token tokentab[] = { #endif {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime}, {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0}, {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, @@ -3596,8 +3597,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) assert(nexp > 0); } - r->builtin = tokentab[idx].ptr; - /* check against how many args. are allowed for this builtin */ args_allowed = tokentab[idx].flags & ARGS; if (args_allowed && (args_allowed & A(nexp)) == 0) { @@ -3606,7 +3605,85 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) return NULL; } + /* special processing for sub, gsub and gensub */ + + if (tokentab[idx].value == Op_sub_builtin) { + const char *operator = tokentab[idx].operator; + + r->sub_flags = 0; + + arg = subn->nexti; /* first arg list */ + (void) mk_rexp(arg); + + if (strcmp(operator, "gensub") != 0) { + /* sun and gsub */ + + if (strcmp(operator, "gsub") == 0) + r->sub_flags |= GSUB; + + arg = arg->lasti->nexti; /* 2nd arg list */ + if (nexp == 2) { + INSTRUCTION *expr; + expr = list_create(instruction(Op_push_i)); + expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(expr, instruction(Op_field_spec))); + } + + arg = arg->lasti->nexti; /* third arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push_i) { + if (do_lint) + lintwarn(_("%s: string literal as last arg of substitute has no effect"), + operator); + r->sub_flags |= LITERAL; + } else { + if (make_assignable(ip) == NULL) + yyerror(_("%s third parameter is not a changeable object"), + operator); + else + ip->do_reference = TRUE; + } + + r->expr_count = count_expressions(&subn, FALSE); + ip = subn->lasti; + + (void) list_append(subn, r); + + /* add after_assign code */ + if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { + (void) list_append(subn, instruction(Op_var_assign)); + subn->lasti->memory = ip->memory; + subn->lasti->assign_var = ip->memory->var_assign; + r->sub_flags |= AFTER_ASSIGN; + } else if (ip->opcode == Op_field_spec_lhs) { + (void) list_append(subn, instruction(Op_field_assign)); + subn->lasti->field_assign = (Func_ptr) 0; + ip->target_assign = subn->lasti; + r->sub_flags |= AFTER_ASSIGN; + } + return subn; + + } else { + /* gensub */ + + r->sub_flags |= GENSUB; + if (nexp == 3) { + ip = instruction(Op_push_i); + ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(list_create(ip), instruction(Op_field_spec))); + } + + r->expr_count = count_expressions(&subn, FALSE); + return list_append(subn, r); + } + } + + r->builtin = tokentab[idx].ptr; + /* special case processing for a few builtins */ + if (r->builtin == do_length) { if (nexp == 0) { /* no args. Use $0 */ @@ -3648,71 +3725,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) if (/*ip == arg->nexti && */ ip->opcode == Op_push) ip->opcode = Op_push_array; } - } else if (r->builtin == do_sub || r->builtin == do_gsub) { - int literal = FALSE; - - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); - - arg = arg->lasti->nexti; /* 2nd arg list */ - if (nexp == 2) { - INSTRUCTION *expr; - expr = list_create(instruction(Op_push_i)); - expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(expr, instruction(Op_field_spec))); - } - - arg = arg->lasti->nexti; /* third arg list */ - ip = arg->lasti; - if (ip->opcode == Op_push_i) { - if (do_lint) - lintwarn(_("%s: string literal as last arg of substitute has no effect"), - (r->builtin == do_sub) ? "sub" : "gsub"); - literal = TRUE; - } else { - if (make_assignable(ip) == NULL) - yyerror(_("%s third parameter is not a changeable object"), - (r->builtin == do_sub) ? "sub" : "gsub"); - else - ip->do_reference = TRUE; - } - - /* kludge: This is one of the few cases - * when we need to know the type of item on stack. - * In case of string literal as the last argument, - * pass 4 as # of args (See sub_common code in builtin.c). - * Other cases like length(array or scalar) seem - * to work out ok. - */ - - r->expr_count = count_expressions(&subn, FALSE) + !!literal; - ip = subn->lasti; - - (void) list_append(subn, r); - - /* add after_assign bytecode(s) */ - if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { - (void) list_append(subn, instruction(Op_var_assign)); - subn->lasti->memory = ip->memory; - subn->lasti->assign_var = ip->memory->var_assign; - } else if (ip->opcode == Op_field_spec_lhs) { - (void) list_append(subn, instruction(Op_field_assign)); - subn->lasti->field_assign = (Func_ptr) 0; - ip->target_assign = subn->lasti; - } - return subn; - } else if (r->builtin == do_gensub) { - if (nexp == 3) { - arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ - ip = instruction(Op_push_i); - ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(list_create(ip), - instruction(Op_field_spec))); - } - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); } else if (r->builtin == do_split) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; diff --git a/builtin.c b/builtin.c index 724ea6d..d554930 100644 --- a/builtin.c +++ b/builtin.c @@ -72,7 +72,6 @@ extern NODE **fields_arr; extern int output_is_tty; extern FILE *output_fp; -static NODE *sub_common(int nargs, long how_many, int backdigs); #define POP_TWO_SCALARS(s1, s2) \ s2 = POP_SCALAR(); \ @@ -2319,7 +2318,7 @@ do_match(int nargs) return make_number((AWKNUM) rstart); } -/* sub_common --- the common code (does the work) for sub, gsub, and gensub */ +/* do_sub --- do the work for sub, gsub, and gensub */ /* * Gsub can be tricksy; particularly when handling the case of null strings. @@ -2412,12 +2411,12 @@ do_match(int nargs) * NB: `howmany' conflicts with a SunOS 4.x macro in . */ -static NODE * -sub_common(int nargs, long how_many, int backdigs) +NODE * +do_sub(int nargs, unsigned int flags, int *num_matches) { char *scan; char *bp, *cp; - char *buf; + char *buf = NULL; size_t buflen; char *matchend; size_t len; @@ -2434,38 +2433,77 @@ sub_common(int nargs, long how_many, int backdigs) NODE *s; /* subst. pattern */ NODE *t; /* string to make sub. in; $0 if none given */ NODE *tmp; - NODE **lhs; - int global = (how_many == -1); + NODE **lhs = NULL; + long how_many = 1; /* one substitution for sub, also gensub default */ + int global; long current; int lastmatchnonzero; char *mb_indices = NULL; - - tmp = PEEK(2); /* take care of regexp early, in case re_update is fatal */ - rp = re_update(tmp); - /* original string */ - if (nargs == 4) { /* kludge: no of items on stack is really 3, - * See snode(..) in awkgram.y - */ - lhs = NULL; - t = POP_STRING(); + if ((flags & GENSUB) != 0) { + double d; + NODE *t1; + + tmp = PEEK(3); + rp = re_update(tmp); + + t = POP_STRING(); /* original string */ + + t1 = POP_SCALAR(); /* value of global flag */ + if ((t1->flags & (STRCUR|STRING)) != 0) { + if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G')) + how_many = -1; + else { + d = force_number(t1); + + if ((t1->flags & NUMCUR) != 0) + goto set_how_many; + + how_many = 1; + } + } else { + d = force_number(t1); +set_how_many: + if (d < 1) + how_many = 1; + else if (d < LONG_MAX) + how_many = d; + else + how_many = LONG_MAX; + if (d == 0) + warning(_("gensub: third argument of 0 treated as 1")); + } + DEREF(t1); + } else { - lhs = POP_ADDRESS(); - t = force_string(*lhs); + + /* take care of regexp early, in case re_update is fatal */ + + tmp = PEEK(2); + rp = re_update(tmp); + + if ((flags & GSUB) != 0) + how_many = -1; + + /* original string */ + + if ((flags & LITERAL) != 0) + t = POP_STRING(); + else { + lhs = POP_ADDRESS(); + t = force_string(*lhs); + } } + global = (how_many == -1); - s = POP_STRING(); /* replacement text */ + s = POP_STRING(); /* replacement text */ decr_sp(); /* regexp, already updated above */ /* do the search early to avoid work on non-match */ if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 || - RESTART(rp, t->stptr) > t->stlen) { - if (lhs == NULL) - DEREF(t); - DEREF(s); - return make_number((AWKNUM) 0.0); - } + RESTART(rp, t->stptr) > t->stlen) + goto done; t->flags |= STRING; @@ -2476,7 +2514,7 @@ sub_common(int nargs, long how_many, int backdigs) repl = s->stptr; replend = repl + s->stlen; repllen = replend - repl; - emalloc(buf, char *, buflen + 2, "sub_common"); + emalloc(buf, char *, buflen + 2, "do_sub"); buf[buflen] = '\0'; buf[buflen + 1] = '\0'; ampersands = 0; @@ -2490,7 +2528,7 @@ sub_common(int nargs, long how_many, int backdigs) * for example. */ if (gawk_mb_cur_max > 1 && repllen > 0) { - emalloc(mb_indices, char *, repllen * sizeof(char), "sub_common"); + emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub"); index_multibyte_buffer(repl, mb_indices, repllen); } @@ -2500,7 +2538,7 @@ sub_common(int nargs, long how_many, int backdigs) repllen--; ampersands++; } else if (*scan == '\\') { - if (backdigs) { /* gensub, behave sanely */ + if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { ampersands++; scan++; @@ -2575,7 +2613,7 @@ sub_common(int nargs, long how_many, int backdigs) && (gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1)) ) { - if (backdigs) { /* gensub, behave sanely */ + if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { int dig = scan[1] - '0'; if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { @@ -2619,7 +2657,7 @@ sub_common(int nargs, long how_many, int backdigs) textlen = text + textlen - matchend; text = matchend; - if ((current >= how_many && !global) + if ((current >= how_many && ! global) || ((long) textlen <= 0 && matchstart == matchend) || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1) break; @@ -2628,7 +2666,7 @@ sub_common(int nargs, long how_many, int backdigs) sofar = bp - buf; if (buflen - sofar - textlen - 1) { buflen = sofar + textlen + 2; - erealloc(buf, char *, buflen, "sub_common"); + erealloc(buf, char *, buflen, "do_sub"); bp = buf + sofar; } for (scan = matchend; scan < text + textlen; scan++) @@ -2636,102 +2674,39 @@ sub_common(int nargs, long how_many, int backdigs) *bp = '\0'; textlen = bp - buf; - DEREF(s); - - if (lhs != NULL) { - if (matches > 0) { - unref(*lhs); - *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); - } else - efree(buf); - } else { - efree(buf); - DEREF(t); - } - if (mb_indices != NULL) efree(mb_indices); - return make_number((AWKNUM) matches); -} - -/* do_gsub --- global substitution */ - -NODE * -do_gsub(int nargs) -{ - return sub_common(nargs, -1, FALSE); -} - -/* do_sub --- single substitution */ - -NODE * -do_sub(int nargs) -{ - return sub_common(nargs, 1, FALSE); -} - -/* do_gensub --- fix up the tree for sub_common for the gensub function */ - -NODE * -do_gensub(int nargs) -{ - NODE *t, *tmp, *target, *ret; - long how_many = 1; /* default is one substitution */ - double d; - - tmp = POP_STRING(); /* target */ - t = POP_SCALAR(); /* value of global flag */ - - /* - * We make copy of the original target string, and pass that - * in to sub_common() as the target to make the substitution in. - * We will then return the result string as the return value of - * this function. - */ - - target = make_string(tmp->stptr, tmp->stlen); - DEREF(tmp); - PUSH_ADDRESS(& target); - - if ((t->flags & (STRCUR|STRING)) != 0) { - if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G')) - how_many = -1; - else { - d = force_number(t); +done: + DEREF(s); - if ((t->flags & NUMCUR) != 0) - goto set_how_many; + *num_matches = matches; + if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) + efree(buf); - how_many = 1; + if (flags & GENSUB) { + if (matches > 0) { + /* return the result string */ + DEREF(t); + return make_str_node(buf, textlen, ALREADY_MALLOCED); } - } else { - d = force_number(t); -set_how_many: - if (d < 1) - how_many = 1; - else if (d < LONG_MAX) - how_many = d; - else - how_many = LONG_MAX; - if (d == 0) - warning(_("gensub: third argument of 0 treated as 1")); - } - - DEREF(t); - ret = sub_common(3, how_many, TRUE); - unref(ret); + /* return the original string */ + return t; + } - /* - * Note that we don't care what sub_common() returns, since the - * easiest thing for the programmer is to return the string, even - * if no substitutions were done. - */ + /* For a string literal, must not change the original string. */ + if (flags & LITERAL) + DEREF(t); + else if (matches > 0) { + unref(*lhs); + *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); + } - return target; + return make_number((AWKNUM) matches); } + /* make_integer - Convert an integer to a number node. */ static NODE * diff --git a/debug.c b/debug.c index 9b9db34..404042c 100644 --- a/debug.c +++ b/debug.c @@ -3740,7 +3740,16 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump) break; case Op_var_assign: - print_func(fp, "[set_%s]\n", pc->memory->vname); + if (pc->assign_var) + print_func(fp, "[set_%s()]", pc->memory->vname); + print_func(fp, "\n"); + break; + + case Op_field_assign: + if (pc->field_assign) + print_func(fp, "[%s]", pc->field_assign == reset_record ? + "reset_record()" : "invalidate_field0()"); + print_func(fp, "\n"); break; case Op_field_spec_lhs: @@ -3830,6 +3839,27 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump) pc->line_range, pc->target_jmp); break; + case Op_sub_builtin: + { + const char *fname = "sub"; + static const struct flagtab values[] = { + { GSUB, "GSUB" }, + { GENSUB, "GENSUB" }, + { AFTER_ASSIGN, "AFTER_ASSIGN" }, + { LITERAL, "LITERAL" }, + { 0, NULL } + }; + + if (pc->sub_flags & GSUB) + fname = "gsub"; + else if (pc->sub_flags & GENSUB) + fname = "gensub"; + print_func(fp, "%s [arg_count = %ld] [sub_flags = %s]\n", + fname, pc->expr_count, + genflags2str(pc->sub_flags, values)); + } + break; + case Op_builtin: { const char *fname = getfname(pc->builtin); diff --git a/eval.c b/eval.c index 4132474..bdbd04b 100644 --- a/eval.c +++ b/eval.c @@ -348,6 +348,7 @@ static struct optypetab { { "Op_K_getline", "getline" }, { "Op_K_nextfile", "nextfile" }, { "Op_builtin", NULL }, + { "Op_sub_builtin", NULL }, { "Op_in_array", " in " }, { "Op_func_call", NULL }, { "Op_indirect_func_call", NULL }, @@ -2114,11 +2115,13 @@ post: break; case Op_var_assign: - pc->assign_var(); + if (pc->assign_var) + pc->assign_var(); break; case Op_field_assign: - pc->field_assign(); + if (pc->field_assign) + pc->field_assign(); break; case Op_concat: @@ -2256,7 +2259,34 @@ arrayfor: #endif PUSH(r); break; - + + case Op_sub_builtin: + { + /* sub, gsub and gensub */ + + int matches = 0; + + r = do_sub(pc->expr_count, pc->sub_flags, & matches); + PUSH(r); + + if (matches == 0 && (pc->sub_flags & AFTER_ASSIGN) != 0) { + + /* For sub and gsub, must not execute after_assign code; + * If the target is a FIELD, this means no field re-splitting or + * $0 reconstruction. For a special variable as target, + * set_XX routine is not called. + */ + + ni = pc->nexti; + assert(ni->opcode == Op_field_assign || ni->opcode == Op_var_assign); + if (ni->opcode == Op_field_assign) + ni->field_assign = (Func_ptr) 0; + else + ni->assign_var = (Func_ptr) 0; + } + } + break; + case Op_K_print: do_print(pc->expr_count, pc->redir_type); break; diff --git a/profile.c b/profile.c index cba8be9..01d1e42 100644 --- a/profile.c +++ b/profile.c @@ -507,6 +507,20 @@ cleanup: case Op_after_endfile: break; + case Op_sub_builtin: + { + const char *fname = "sub"; + if (pc->sub_flags & GSUB) + fname = "gsub"; + else if (pc->sub_flags & GENSUB) + fname = "gensub"; + tmp = pp_list(pc->expr_count, "()", ", "); + str = pp_concat(fname, tmp, ""); + efree(tmp); + pp_push(Op_sub_builtin, str, CAN_FREE); + } + break; + case Op_builtin: { static char *ext_func = "extension_function()";