This file is indexed.

/usr/lib/ruby/vendor_ruby/ruby_lexer.rb is in ruby-parser 3.6.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
# encoding: UTF-8

class RubyLexer

  # :stopdoc:
  RUBY19 = "".respond_to? :encoding

  IDENT_CHAR = if RUBY19 then
                 /[\w\u0080-\u{10ffff}]/u
               else
                 /[\w\x80-\xFF]/n
               end

  EOF = :eof_haha!

  # ruby constants for strings (should this be moved somewhere else?)

  STR_FUNC_BORING = 0x00
  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
  STR_FUNC_EXPAND = 0x02
  STR_FUNC_REGEXP = 0x04
  STR_FUNC_QWORDS = 0x08
  STR_FUNC_SYMBOL = 0x10
  STR_FUNC_INDENT = 0x20 # <<-HEREDOC

  STR_SQUOTE = STR_FUNC_BORING
  STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
  STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
  STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
  STR_SSYM   = STR_FUNC_SYMBOL
  STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND

  ESCAPES = {
    "a"    => "\007",
    "b"    => "\010",
    "e"    => "\033",
    "f"    => "\f",
    "n"    => "\n",
    "r"    => "\r",
    "s"    => " ",
    "t"    => "\t",
    "v"    => "\13",
    "\\"   => '\\',
    "\n"   => "",
    "C-\?" => 127.chr,
    "c\?"  => 127.chr,
  }

  TOKENS = {
    "!"   => :tBANG,
    "!="  => :tNEQ,
    # "!@"  => :tUBANG,
    "!~"  => :tNMATCH,
    ","   => :tCOMMA,
    ".."  => :tDOT2,
    "..." => :tDOT3,
    "="   => :tEQL,
    "=="  => :tEQ,
    "===" => :tEQQ,
    "=>"  => :tASSOC,
    "=~"  => :tMATCH,
    "->"  => :tLAMBDA,
  }

  @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
  @@regexp_cache[nil] = nil

  # :startdoc:

  attr_accessor :brace_nest
  attr_accessor :cmdarg
  attr_accessor :command_start
  attr_accessor :command_state
  attr_accessor :last_state
  attr_accessor :cond
  attr_accessor :extra_lineno

  ##
  # Additional context surrounding tokens that both the lexer and
  # grammar use.

  attr_accessor :lex_state

  attr_accessor :lex_strterm
  attr_accessor :lpar_beg
  attr_accessor :paren_nest
  attr_accessor :parser # HACK for very end of lexer... *sigh*
  attr_accessor :space_seen
  attr_accessor :string_buffer
  attr_accessor :string_nest

  # Last token read via next_token.
  attr_accessor :token

  ##
  # What version of ruby to parse. 18 and 19 are the only valid values
  # currently supported.

  attr_accessor :version

  attr_writer :comments

  def initialize v = 18
    self.version = v

    reset
  end

  def arg_ambiguous
    self.warning("Ambiguous first argument. make sure.")
  end

  def arg_state
    in_arg_state? ? :expr_arg : :expr_beg
  end

  def beginning_of_line?
    ss.bol?
  end
  alias :bol? :beginning_of_line? # to make .rex file more readable

  def check re
    ss.check re
  end

  def comments # TODO: remove this... maybe comment_string + attr_accessor
    c = @comments.join
    @comments.clear
    c
  end

  def end_of_stream?
    ss.eos?
  end

  def expr_dot?
    lex_state == :expr_dot
  end

  def expr_fname?
    lex_state == :expr_fname
  end

  def expr_result token, text
    cond.push false
    cmdarg.push false
    result :expr_beg, token, text
  end

  def heredoc here # TODO: rewrite / remove
    _, eos, func, last_line = here

    indent  = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
    expand  = (func & STR_FUNC_EXPAND) != 0
    eos_re  = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
    err_msg = "can't match #{eos_re.inspect} anywhere in "

    rb_compile_error err_msg if end_of_stream?

    if beginning_of_line? && scan(eos_re) then
      self.lineno += 1
      ss.unread_many last_line # TODO: figure out how to remove this
      return :tSTRING_END, eos
    end

    self.string_buffer = []

    if expand then
      case
      when scan(/#[$@]/) then
        ss.pos -= 1 # FIX omg stupid
        return :tSTRING_DVAR, matched
      when scan(/#[{]/) then
        return :tSTRING_DBEG, matched
      when scan(/#/) then
        string_buffer << '#'
      end

      begin
        c = tokadd_string func, "\n", nil

        rb_compile_error err_msg if
          c == RubyLexer::EOF

        if c != "\n" then
          return :tSTRING_CONTENT, string_buffer.join.delete("\r")
        else
          string_buffer << scan(/\n/)
        end

        rb_compile_error err_msg if end_of_stream?
      end until check(eos_re)
    else
      until check(eos_re) do
        string_buffer << scan(/.*(\n|\z)/)
        rb_compile_error err_msg if end_of_stream?
      end
    end

    self.lex_strterm = [:heredoc, eos, func, last_line]

    return :tSTRING_CONTENT, string_buffer.join.delete("\r")
  end

  def heredoc_identifier # TODO: remove / rewrite
    term, func = nil, STR_FUNC_BORING
    self.string_buffer = []

    case
    when scan(/(-?)([\'\"\`])(.*?)\2/) then
      term = ss[2]
      func |= STR_FUNC_INDENT unless ss[1].empty?
      func |= case term
              when "\'" then
                STR_SQUOTE
              when '"' then
                STR_DQUOTE
              else
                STR_XQUOTE
              end
      string_buffer << ss[3]
    when scan(/-?([\'\"\`])(?!\1*\Z)/) then
      rb_compile_error "unterminated here document identifier"
    when scan(/(-?)(#{IDENT_CHAR}+)/) then
      term = '"'
      func |= STR_DQUOTE
      unless ss[1].empty? then
        func |= STR_FUNC_INDENT
      end
      string_buffer << ss[2]
    else
      return nil
    end

    if scan(/.*\n/) then
      # TODO: think about storing off the char range instead
      line = matched
    else
      line = nil
    end

    self.lex_strterm = [:heredoc, string_buffer.join, func, line]

    if term == '`' then
      result nil, :tXSTRING_BEG, "`"
    else
      result nil, :tSTRING_BEG, "\""
    end
  end

  def in_fname?
    in_lex_state? :expr_fname
  end

  def in_arg_state? # TODO: rename is_after_operator?
    in_lex_state? :expr_fname, :expr_dot
  end

  def in_lex_state?(*states)
    states.include? lex_state
  end

  def int_with_base base
    rb_compile_error "Invalid numeric format" if matched =~ /__/
    return result(:expr_end, :tINTEGER, matched.to_i(base))
  end

  def is_arg?
    in_lex_state? :expr_arg, :expr_cmdarg
  end

  def is_beg?
    in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
  end

  def is_end?
    in_lex_state? :expr_end, :expr_endarg, :expr_endfn
  end

  def is_label_possible?
    (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
  end

  def is_space_arg? c = "x"
    is_arg? and space_seen and c !~ /\s/
  end

  def matched
    ss.matched
  end

  def not_end?
    not is_end?
  end

  def process_amper text
    token = if is_arg? && space_seen && !check(/\s/) then
               warning("`&' interpreted as argument prefix")
               :tAMPER
             elsif in_lex_state? :expr_beg, :expr_mid then
               :tAMPER
             else
               :tAMPER2
             end

    return result(:arg_state, token, "&")
  end

  def process_backref text
    token = ss[1].to_sym
    # TODO: can't do lineno hack w/ symbol
    result :expr_end, :tBACK_REF, token
  end

  def process_begin text
    @comments << matched

    unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
      @comments.clear
      rb_compile_error("embedded document meets end of file")
    end

    @comments << matched

    nil # TODO
  end

  def process_bracing text
    cond.lexpop
    cmdarg.lexpop

    case matched
    when "}" then
      self.brace_nest -= 1
      self.lex_state   = :expr_endarg
      return :tRCURLY, matched
    when "]" then
      self.paren_nest -= 1
      self.lex_state   = :expr_endarg
      return :tRBRACK, matched
    when ")" then
      self.paren_nest -= 1
      self.lex_state   = :expr_endfn
      return :tRPAREN, matched
    else
      raise "Unknown bracing: #{matched.inspect}"
    end
  end

  def process_colon1 text
    # ?: / then / when
    if is_end? || check(/\s/) then
      return result :expr_beg, :tCOLON, text
    end

    case
    when scan(/\'/) then
      string STR_SSYM
    when scan(/\"/) then
      string STR_DSYM
    end

    result :expr_fname, :tSYMBEG, text
  end

  def process_colon2 text
    if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
      result :expr_beg, :tCOLON3, text
    else
      result :expr_dot, :tCOLON2, text
    end
  end

  def process_curly_brace text
    self.brace_nest += 1
    if lpar_beg && lpar_beg == paren_nest then
      self.lpar_beg = nil
      self.paren_nest -= 1

      return expr_result(:tLAMBEG, "{")
    end

    token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
               :tLCURLY      #  block (primary)
             elsif in_lex_state?(:expr_endarg) then
               :tLBRACE_ARG  #  block (expr)
             else
               :tLBRACE      #  hash
             end

    self.command_start = true unless token == :tLBRACE

    return expr_result(token, "{")
  end

  def process_float text
    rb_compile_error "Invalid numeric format" if text =~ /__/
    return result(:expr_end, :tFLOAT, text.to_f)
  end

  def process_gvar text
    text.lineno = self.lineno
    result(:expr_end, :tGVAR, text)
  end

  def process_gvar_oddity text
    result :expr_end, "$", "$" # TODO: wtf is this?
  end

  def process_ivar text
    tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
    text.lineno = self.lineno
    return result(:expr_end, tok_id, text)
  end

  def process_lchevron text
    if (!in_lex_state?(:expr_dot, :expr_class) &&
        !is_end? &&
        (!is_arg? || space_seen)) then
      tok = self.heredoc_identifier
      return tok if tok
    end

    return result(:arg_state, :tLSHFT, "\<\<")
  end

  def process_newline_or_comment text
    c = matched
    hit = false

    if c == '#' then
      ss.pos -= 1

      while scan(/\s*\#.*(\n+|\z)/) do
        hit = true
        self.lineno += matched.lines.to_a.size
        @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
      end

      return nil if end_of_stream?
    end

    self.lineno += 1 unless hit

    # Replace a string of newlines with a single one
    self.lineno += matched.lines.to_a.size if scan(/\n+/)

    return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
                            :expr_fname, :expr_dot)

    if scan(/([\ \t\r\f\v]*)\./) then
      self.space_seen = true unless ss[1].empty?

      ss.pos -= 1
      return unless check(/\.\./)
    end

    self.command_start = true

    return result(:expr_beg, :tNL, nil)
  end

  def process_nthref text
    # TODO: can't do lineno hack w/ number
    result :expr_end, :tNTH_REF, ss[1].to_i
  end

  def process_paren text
    token = if ruby18 then
              process_paren18
            else
              process_paren19
            end

    self.paren_nest += 1

    return expr_result(token, "(")
  end

  def process_paren18
    self.command_start = true
    token = :tLPAREN2

    if in_lex_state? :expr_beg, :expr_mid then
      token = :tLPAREN
    elsif space_seen then
      if in_lex_state? :expr_cmdarg then
        token = :tLPAREN_ARG
      elsif in_lex_state? :expr_arg then
        warning "don't put space before argument parentheses"
      end
    else
      # not a ternary -- do nothing?
    end

    token
  end

  def process_paren19
    if is_beg? then
      :tLPAREN
    elsif is_space_arg? then
      :tLPAREN_ARG
    else
      :tLPAREN2 # plain '(' in parse.y
    end
  end

  def process_percent text
    return parse_quote if is_beg?

    return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)

    return parse_quote if is_arg? && space_seen && ! check(/\s/)

    return result(:arg_state, :tPERCENT, "%")
  end

  def process_plus_minus text
    sign = matched
    utype, type = if sign == "+" then
                    [:tUPLUS, :tPLUS]
                  else
                    [:tUMINUS, :tMINUS]
                  end

    if in_arg_state? then
      if scan(/@/) then
        return result(:expr_arg, utype, "#{sign}@")
      else
        return result(:expr_arg, type, sign)
      end
    end

    return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)

    if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
      arg_ambiguous if is_arg?

      if check(/\d/) then
        return nil if utype == :tUPLUS
        return result(:expr_beg, :tUMINUS_NUM, sign)
      end

      return result(:expr_beg, utype, sign)
    end

    return result(:expr_beg, type, sign)
  end

  def process_questionmark text
    if is_end? then
      state = ruby18 ? :expr_beg : :expr_value # HACK?
      return result(state, :tEH, "?")
    end

    if end_of_stream? then
      rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
    end

    if check(/\s|\v/) then
      unless is_arg? then
        c2 = { " " => 's',
              "\n" => 'n',
              "\t" => 't',
              "\v" => 'v',
              "\r" => 'r',
              "\f" => 'f' }[matched]

        if c2 then
          warning("invalid character syntax; use ?\\" + c2)
        end
      end

      # ternary
      state = ruby18 ? :expr_beg : :expr_value # HACK?
      return result(state, :tEH, "?")
    elsif check(/\w(?=\w)/) then # ternary, also
      return result(:expr_beg, :tEH, "?")
    end

    c = if scan(/\\/) then
          self.read_escape
        else
          ss.getch
        end

    if version == 18 then
      return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
    else
      return result(:expr_end, :tSTRING, c)
    end
  end

  def process_slash text
    if is_beg? then
      string STR_REGEXP

      return result(nil, :tREGEXP_BEG, "/")
    end

    if scan(/\=/) then
      return result(:expr_beg, :tOP_ASGN, "/")
    end

    if is_arg? && space_seen then
      unless scan(/\s/) then
        arg_ambiguous
        string STR_REGEXP, "/"
        return result(nil, :tREGEXP_BEG, "/")
      end
    end

    return result(:arg_state, :tDIVIDE, "/")
  end

  def process_square_bracket text
    self.paren_nest += 1

    token = nil

    if in_arg_state? then
      case
      when scan(/\]\=/) then
        self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
        return result(:expr_arg, :tASET, "[]=")
      when scan(/\]/) then
        self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
        return result(:expr_arg, :tAREF, "[]")
      else
        rb_compile_error "unexpected '['"
      end
    elsif is_beg? then
      token = :tLBRACK
    elsif is_arg? && space_seen then
      token = :tLBRACK
    else
      token = :tLBRACK2
    end

    return expr_result(token, "[")
  end

  def process_symbol text
    symbol = match[1].gsub(ESC) { unescape $1 }

    rb_compile_error "symbol cannot contain '\\0'" if
      ruby18 && symbol =~ /\0/

    return result(:expr_end, :tSYMBOL, symbol)
  end

  def process_token text
    # TODO: make this always return [token, lineno]
    token = self.token = text
    token << matched if scan(/[\!\?](?!=)/)

    tok_id =
      case
      when token =~ /[!?]$/ then
        :tFID
      when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
        # ident=, not =~ => == or followed by =>
        # TODO test lexing of a=>b vs a==>b
        token << matched
        :tIDENTIFIER
      when token =~ /^[A-Z]/ then
        :tCONSTANT
      else
        :tIDENTIFIER
      end

    if !ruby18 and is_label_possible? and scan(/:(?!:)/) then
      return result(:expr_beg, :tLABEL, [token, self.lineno])
    end

    unless in_lex_state? :expr_dot then
      # See if it is a reserved word.
      keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
                  RubyParserStuff::Keyword.keyword18 token
                else
                  RubyParserStuff::Keyword.keyword19 token
                end

      return process_token_keyword keyword if keyword
    end # unless in_lex_state? :expr_dot

    # TODO:
    # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {

    state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
              command_state ? :expr_cmdarg : :expr_arg
            elsif not ruby18 and in_lex_state? :expr_fname then
              :expr_endfn
            else
              :expr_end
            end

    if not [:expr_dot, :expr_fname].include? last_state and
        self.parser.env[token.to_sym] == :lvar then
      state = :expr_end
    end

    token.lineno = self.lineno # yes, on a string. I know... I know...

    return result(state, tok_id, token)
  end

  def process_token_keyword keyword
    state = keyword.state

    value = [token, self.lineno]

    self.command_start = true if state == :expr_beg and lex_state != :expr_fname

    case
    when lex_state == :expr_fname then
      result(state, keyword.id0, keyword.name)
    when keyword.id0 == :kDO then
      case
      when lpar_beg && lpar_beg == paren_nest then
        self.lpar_beg = nil
        self.paren_nest -= 1
        result(state, :kDO_LAMBDA, value)
      when cond.is_in_state then
        result(state, :kDO_COND, value)
      when cmdarg.is_in_state && lex_state != :expr_cmdarg then
        result(state, :kDO_BLOCK, value)
      when in_lex_state?(:expr_beg, :expr_endarg) then
        result(state, :kDO_BLOCK, value)
      else
        result(state, :kDO, value)
      end
    when in_lex_state?(:expr_beg, :expr_value) then
      result(state, keyword.id0, value)
    when keyword.id0 != keyword.id1 then
      result(:expr_beg, keyword.id1, value)
    else
      result(state, keyword.id1, value)
    end
  end

  def process_underscore text
    ss.unscan # put back "_"

    if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
      return [RubyLexer::EOF, RubyLexer::EOF]
    elsif scan(/\_\w*/) then
      return process_token matched
    end
  end

  def rb_compile_error msg
    msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
    raise RubyParser::SyntaxError, msg
  end

  def read_escape # TODO: remove / rewrite
    case
    when scan(/\\/) then                  # Backslash
      '\\'
    when scan(/n/) then                   # newline
      "\n"
    when scan(/t/) then                   # horizontal tab
      "\t"
    when scan(/r/) then                   # carriage-return
      "\r"
    when scan(/f/) then                   # form-feed
      "\f"
    when scan(/v/) then                   # vertical tab
      "\13"
    when scan(/a/) then                   # alarm(bell)
      "\007"
    when scan(/e/) then                   # escape
      "\033"
    when scan(/b/) then                   # backspace
      "\010"
    when scan(/s/) then                   # space
      " "
    when scan(/[0-7]{1,3}/) then          # octal constant
      (matched.to_i(8) & 0xFF).chr
    when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
      ss[1].to_i(16).chr
    when check(/M-\\[\\MCc]/) then
      scan(/M-\\/) # eat it
      c = self.read_escape
      c[0] = (c[0].ord | 0x80).chr
      c
    when scan(/M-(.)/) then
      c = ss[1]
      c[0] = (c[0].ord | 0x80).chr
      c
    when check(/(C-|c)\\[\\MCc]/) then
      scan(/(C-|c)\\/) # eat it
      c = self.read_escape
      c[0] = (c[0].ord & 0x9f).chr
      c
    when scan(/C-\?|c\?/) then
      127.chr
    when scan(/(C-|c)(.)/) then
      c = ss[2]
      c[0] = (c[0].ord & 0x9f).chr
      c
    when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
      matched
    when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
      [ss[1].delete("{}").to_i(16)].pack("U")
    when scan(/[McCx0-9]/) || end_of_stream? then
      rb_compile_error("Invalid escape character syntax")
    else
      ss.getch
    end
  end

  def regx_options # TODO: rewrite / remove
    good, bad = [], []

    if scan(/[a-z]+/) then
      good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
    end

    unless bad.empty? then
      rb_compile_error("unknown regexp option%s - %s" %
                       [(bad.size > 1 ? "s" : ""), bad.join.inspect])
    end

    return good.join
  end

  def reset
    self.brace_nest    = 0
    self.command_start = true
    self.comments      = []
    self.lex_state     = nil
    self.lex_strterm   = nil
    self.lineno        = 1
    self.lpar_beg      = nil
    self.paren_nest    = 0
    self.space_seen    = false
    self.string_nest   = 0
    self.token         = nil
    self.extra_lineno  = 0

    self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
    self.cond   = RubyParserStuff::StackState.new(:cond)
  end

  def result lex_state, token, text # :nodoc:
    lex_state = self.arg_state if lex_state == :arg_state
    self.lex_state = lex_state if lex_state
    [token, text]
  end

  def ruby18
    Ruby18Parser === parser
  end

  def ruby19
    Ruby19Parser === parser
  end

  def scan re
    ss.scan re
  end

  def scanner_class # TODO: design this out of oedipus_lex. or something.
    RPStringScanner
  end

  def space_vs_beginning space_type, beg_type, fallback
    if is_space_arg? check(/./m) then
      warning "`**' interpreted as argument prefix"
      space_type
    elsif is_beg? then
      beg_type
    else
      # TODO: warn_balanced("**", "argument prefix");
      fallback
    end
  end

  def string type, beg = matched, nnd = "\0"
    self.lex_strterm = [:strterm, type, beg, nnd]
  end

  # TODO: consider
  # def src= src
  #   raise "bad src: #{src.inspect}" unless String === src
  #   @src = RPStringScanner.new(src)
  # end

  def tokadd_escape term # TODO: rewrite / remove
    case
    when scan(/\\\n/) then
      # just ignore
    when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
      self.string_buffer << matched
    when scan(/\\([MC]-|c)(?=\\)/) then
      self.string_buffer << matched
      self.tokadd_escape term
    when scan(/\\([MC]-|c)(.)/) then
      self.string_buffer << matched
    when scan(/\\[McCx]/) then
      rb_compile_error "Invalid escape character syntax"
    when scan(/\\(.)/m) then
      self.string_buffer << matched
    else
      rb_compile_error "Invalid escape character syntax"
    end
  end

  def tokadd_string(func, term, paren) # TODO: rewrite / remove
    qwords = (func & STR_FUNC_QWORDS) != 0
    escape = (func & STR_FUNC_ESCAPE) != 0
    expand = (func & STR_FUNC_EXPAND) != 0
    regexp = (func & STR_FUNC_REGEXP) != 0
    symbol = (func & STR_FUNC_SYMBOL) != 0

    paren_re = @@regexp_cache[paren]
    term_re  = @@regexp_cache[term]

    until end_of_stream? do
      c = nil
      handled = true

      case
      when paren_re && scan(paren_re) then
        self.string_nest += 1
      when scan(term_re) then
        if self.string_nest == 0 then
          ss.pos -= 1
          break
        else
          self.string_nest -= 1
        end
      when expand && scan(/#(?=[\$\@\{])/) then
        ss.pos -= 1
        break
      when qwords && scan(/\s/) then
        ss.pos -= 1
        break
      when expand && scan(/#(?!\n)/) then
        # do nothing
      when check(/\\/) then
        case
        when qwords && scan(/\\\n/) then
          string_buffer << "\n"
          next
        when qwords && scan(/\\\s/) then
          c = ' '
        when expand && scan(/\\\n/) then
          next
        when regexp && check(/\\/) then
          self.tokadd_escape term
          next
        when expand && scan(/\\/) then
          c = self.read_escape
        when scan(/\\\n/) then
          # do nothing
        when scan(/\\\\/) then
          string_buffer << '\\' if escape
          c = '\\'
        when scan(/\\/) then
          unless scan(term_re) || paren.nil? || scan(paren_re) then
            string_buffer << "\\"
          end
        else
          handled = false
        end # inner /\\/ case
      else
        handled = false
      end # top case

      unless handled then
        t = Regexp.escape term
        x = Regexp.escape(paren) if paren && paren != "\000"
        re = if qwords then
               if RUBY19 then
                 /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
               else
                 /[^#{t}#{x}\#\0\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
               end
             else
               /[^#{t}#{x}\#\0\\]+|./
             end

        scan re
        c = matched

        rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
      end # unless handled

      c ||= matched
      string_buffer << c
    end # until

    c ||= matched
    c = RubyLexer::EOF if end_of_stream?

    return c
  end

  def unescape s
    r = ESCAPES[s]

    self.extra_lineno -= 1 if r && s == "n"

    return r if r

    x = case s
        when /^[0-7]{1,3}/ then
          ($&.to_i(8) & 0xFF).chr
        when /^x([0-9a-fA-F]{1,2})/ then
          $1.to_i(16).chr
        when /^M-(.)/ then
          ($1[0].ord | 0x80).chr
        when /^(C-|c)(.)/ then
          ($2[0].ord & 0x9f).chr
        when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
          s
        when /^[McCx0-9]/ then
          rb_compile_error("Invalid escape character syntax")
        when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
          [$1.delete("{}").to_i(16)].pack("U")
        else
          s
        end
    x.force_encoding "UTF-8" if RUBY19
    x
  end

  def warning s
    # do nothing for now
  end

  def process_string # TODO: rewrite / remove
    token = if lex_strterm[0] == :heredoc then
              self.heredoc lex_strterm
            else
              self.parse_string lex_strterm
            end

    token_type, _ = token

    if token_type == :tSTRING_END || token_type == :tREGEXP_END then
      self.lex_strterm = nil
      self.lex_state   = :expr_end
    end

    return token
  end

  def parse_quote # TODO: remove / rewrite
    beg, nnd, short_hand, c = nil, nil, false, nil

    if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
      rb_compile_error "unknown type of %string" if ss.matched_size == 2
      c, beg, short_hand = matched, ss.getch, false
    else                               # Short-hand (e.g. %{, %., %!, etc)
      c, beg, short_hand = 'Q', ss.getch, true
    end

    if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
      rb_compile_error "unterminated quoted string meets end of file"
    end

    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
    nnd, beg = beg, "\0" if nnd.nil?

    token_type, text = nil, "%#{c}#{beg}"
    token_type, string_type = case c
                              when 'Q' then
                                ch = short_hand ? nnd : c + beg
                                text = "%#{ch}"
                                [:tSTRING_BEG,   STR_DQUOTE]
                              when 'q' then
                                [:tSTRING_BEG,   STR_SQUOTE]
                              when 'W' then
                                scan(/\s*/)
                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
                              when 'w' then
                                scan(/\s*/)
                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
                              when 'x' then
                                [:tXSTRING_BEG,  STR_XQUOTE]
                              when 'r' then
                                [:tREGEXP_BEG,   STR_REGEXP]
                              when 's' then
                                self.lex_state  = :expr_fname
                                [:tSYMBEG,       STR_SSYM]
                              when 'I' then
                                scan(/\s*/)
                                [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
                              when 'i' then
                                scan(/\s*/)
                                [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
                              end

    rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
      token_type.nil?

    raise "huh" unless string_type

    string string_type, nnd, beg

    return token_type, text
  end

  def parse_string quote # TODO: rewrite / remove
    _, string_type, term, open = quote

    space = false # FIX: remove these
    func = string_type
    paren = open
    term_re = @@regexp_cache[term]

    qwords = (func & STR_FUNC_QWORDS) != 0
    regexp = (func & STR_FUNC_REGEXP) != 0
    expand = (func & STR_FUNC_EXPAND) != 0

    unless func then # nil'ed from qwords below. *sigh*
      return :tSTRING_END, nil
    end

    space = true if qwords and scan(/\s+/)

    if self.string_nest == 0 && scan(/#{term_re}/) then
      if qwords then
        quote[1] = nil
        return :tSPACE, nil
      elsif regexp then
        return :tREGEXP_END, self.regx_options
      else
        return :tSTRING_END, term
      end
    end

    return :tSPACE, nil if space

    self.string_buffer = []

    if expand
      case
      when scan(/#(?=[$@])/) then
        return :tSTRING_DVAR, nil
      when scan(/#[{]/) then
        return :tSTRING_DBEG, nil
      when scan(/#/) then
        string_buffer << '#'
      end
    end

    if tokadd_string(func, term, paren) == RubyLexer::EOF then
      rb_compile_error "unterminated string meets end of file"
    end

    return :tSTRING_CONTENT, string_buffer.join
  end
end

require "ruby_lexer.rex"

if ENV["DEBUG"] then
  class RubyLexer
    alias :old_lineno= :lineno=

    def d o
      $stderr.puts o.inspect
    end

    def lineno= n
      self.old_lineno= n
      where = caller.first.split(/:/).first(2).join(":")
      d :lineno => [n, where, ss && ss.rest[0,40]]
    end
  end
end