This file is indexed.

/usr/share/perl5/URI/Find/Schemeless.pm is in liburi-find-perl 20160806-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
# Copyright (c) 2000, 2009 Michael G. Schwern.  All rights reserved.
# This program is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.

package URI::Find::Schemeless;

use strict;
use warnings;
use base qw(URI::Find);

# base.pm error in 5.005_03 prevents it from loading URI::Find if I'm
# required first.
use URI::Find ();

use vars qw($VERSION);
$VERSION = 20160806;

my($dnsSet) = '\p{isAlpha}A-Za-z0-9-'; # extended for IDNA domains

my($cruftSet) = __PACKAGE__->cruft_set . '<>?}';

my($tldRe) = __PACKAGE__->top_level_domain_re;

my($uricSet) = __PACKAGE__->uric_set;

=head1 NAME

URI::Find::Schemeless - Find schemeless URIs in arbitrary text.


=head1 SYNOPSIS

  require URI::Find::Schemeless;

  my $finder = URI::Find::Schemeless->new(\&callback);

  The rest is the same as URI::Find.


=head1 DESCRIPTION

URI::Find finds absolute URIs in plain text with some weak heuristics
for finding schemeless URIs.  This subclass is for finding things
which might be URIs in free text.  Things like "www.foo.com" and
"lifes.a.bitch.if.you.aint.got.net".

The heuristics are such that it hopefully finds a minimum of false
positives, but there's no easy way for it know if "COMMAND.COM" refers
to a web site or a file.

=cut

sub schemeless_uri_re {
    @_ == 1 || __PACKAGE__->badinvo;
    return qr{
              # Originally I constrained what couldn't be before the match
              # like this:  don't match email addresses, and don't start
              # anywhere but at the beginning of a host name
              #    (?<![\@.$dnsSet])
              # but I switched to saying what can be there after seeing a
              # false match of "Lite.pm" via "MIME/Lite.pm".
              (?: ^ | (?<=[\s<>()\{\}\[\]]) )
              # hostname
              (?: [$dnsSet]+(?:\.[$dnsSet]+)*\.$tldRe
                  | (?:\d{1,3}\.){3}\d{1,3} ) # not inet_aton() complete
              (?:
                  (?=[\s\Q$cruftSet\E]) # followed by unrelated thing
                  (?!\.\w)              #   but don't stop mid foo.xx.bar
                      (?<!\.p[ml])      #   but exclude Foo.pm and Foo.pl
                  |$                    # or end of line
                      (?<!\.p[ml])      #   but exclude Foo.pm and Foo.pl
                  |/[$uricSet#]*        # or slash and URI chars
              )
           }x;
}

=head3 top_level_domain_re

  my $tld_re = $self->top_level_domain_re;

Returns the regex for matching top level DNS domains.  The regex shouldn't
be anchored, it shouldn't do any capturing matches, and it should make
itself ignore case.

=cut

sub top_level_domain_re {
    @_ == 1 || __PACKAGE__->badinvo;
    my($self) = shift;

    use utf8;
    # Updated from http://www.iana.org/domains/root/db/ with new TLDs
    my $plain = join '|', qw(
        AERO
        ARPA
        ASIA
        BIZ
        CAT
        COM
        COOP
        EDU
        GOV
        INFO
        INT
        JOBS
        MIL
        MOBI
        MUSEUM
        NAME
        NET
        ORG
        PRO
        TEL
        TRAVEL
        ac
        academy
        accountants
        active
        actor
        ad
        ae
        aero
        af
        ag
        agency
        ai
        airforce
        al
        am
        an
        ao
        aq
        ar
        archi
        army
        arpa
        as
        asia
        associates
        at
        attorney
        au
        audio
        autos
        aw
        ax
        axa
        az
        ba
        bar
        bargains
        bayern
        bb
        bd
        be
        beer
        berlin
        best
        bf
        bg
        bh
        bi
        bid
        bike
        bio
        biz
        bj
        bl
        black
        blackfriday
        blue
        bm
        bmw
        bn
        bo
        boutique
        bq
        br
        brussels
        bs
        bt
        build
        builders
        buzz
        bv
        bw
        by
        bz
        bzh
        ca
        cab
        camera
        camp
        capetown
        capital
        cards
        care
        career
        careers
        cash
        cat
        catering
        cc
        cd
        center
        ceo
        cf
        cg
        ch
        cheap
        christmas
        church
        ci
        citic
        ck
        cl
        claims
        cleaning
        clinic
        clothing
        club
        cm
        cn
        co
        codes
        coffee
        college
        cologne
        com
        community
        company
        computer
        condos
        construction
        consulting
        contractors
        cooking
        cool
        coop
        country
        cr
        credit
        creditcard
        cruises
        cu
        cv
        cw
        cx
        cy
        cz
        dance
        dating
        de
        degree
        democrat
        dental
        dentist
        desi
        diamonds
        digital
        directory
        discount
        dj
        dk
        dm
        dnp
        do
        domains
        durban
        dz
        ec
        edu
        education
        ee
        eg
        eh
        email
        engineer
        engineering
        enterprises
        equipment
        er
        es
        estate
        et
        eu
        eus
        events
        exchange
        expert
        exposed
        fail
        farm
        feedback
        fi
        finance
        financial
        fish
        fishing
        fitness
        fj
        fk
        flights
        florist
        fm
        fo
        foo
        foundation
        fr
        frogans
        fund
        furniture
        futbol
        ga
        gal
        gallery
        gb
        gd
        ge
        gf
        gg
        gh
        gi
        gift
        gives
        gl
        glass
        global
        globo
        gm
        gmo
        gn
        gop
        gov
        gp
        gq
        gr
        graphics
        gratis
        green
        gripe
        gs
        gt
        gu
        guide
        guitars
        guru
        gw
        gy
        hamburg
        haus
        hiphop
        hiv
        hk
        hm
        hn
        holdings
        holiday
        homes
        horse
        host
        house
        hr
        ht
        hu
        id
        ie
        il
        im
        immobilien
        in
        industries
        info
        ink
        institute
        insure
        int
        international
        investments
        io
        iq
        ir
        is
        it
        je
        jetzt
        jm
        jo
        jobs
        joburg
        jp
        juegos
        kaufen
        ke
        kg
        kh
        ki
        kim
        kitchen
        kiwi
        km
        kn
        koeln
        kp
        kr
        kred
        kw
        ky
        kz
        la
        land
        lawyer
        lb
        lc
        lease
        li
        life
        lighting
        limited
        limo
        link
        lk
        loans
        london
        lotto
        lr
        ls
        lt
        lu
        luxe
        luxury
        lv
        ly
        ma
        maison
        management
        mango
        market
        marketing
        mc
        md
        me
        media
        meet
        menu
        mf
        mg
        mh
        miami
        mil
        mini
        mk
        ml
        mm
        mn
        mo
        mobi
        moda
        moe
        monash
        mortgage
        moscow
        motorcycles
        mp
        mq
        mr
        ms
        mt
        mu
        museum
        mv
        mw
        mx
        my
        mz
        na
        nagoya
        name
        navy
        nc
        ne
        net
        neustar
        nf
        ng
        nhk
        ni
        ninja
        nl
        no
        np
        nr
        nu
        nyc
        nz
        okinawa
        om
        onl
        org
        organic
        ovh
        pa
        paris
        partners
        parts
        pe
        pf
        pg
        ph
        photo
        photography
        photos
        physio
        pics
        pictures
        pink
        pk
        pl
        plumbing
        pm
        pn
        post
        pr
        press
        pro
        productions
        properties
        ps
        pt
        pub
        pw
        py
        qa
        qpon
        quebec
        re
        recipes
        red
        rehab
        reise
        reisen
        ren
        rentals
        repair
        report
        republican
        rest
        reviews
        rich
        rio
        ro
        rocks
        rodeo
        rs
        ru
        ruhr
        rw
        ryukyu
        sa
        saarland
        sb
        sc
        schule
        scot
        sd
        se
        services
        sexy
        sg
        sh
        shiksha
        shoes
        si
        singles
        sj
        sk
        sl
        sm
        sn
        so
        social
        software
        sohu
        solar
        solutions
        soy
        space
        sr
        ss
        st
        su
        supplies
        supply
        support
        surf
        surgery
        sv
        sx
        sy
        systems
        sz
        tattoo
        tax
        tc
        td
        technology
        tel
        tf
        tg
        th
        tienda
        tips
        tirol
        tj
        tk
        tl
        tm
        tn
        to
        today
        tokyo
        tools
        town
        toys
        tp
        tr
        trade
        training
        travel
        tt
        tv
        tw
        tz
        ua
        ug
        uk
        um
        university
        uno
        us
        uy
        uz
        va
        vacations
        vc
        ve
        vegas
        ventures
        versicherung
        vet
        vg
        vi
        viajes
        villas
        vision
        vlaanderen
        vn
        vodka
        vote
        voting
        voto
        voyage
        vu
        wang
        watch
        webcam
        website
        wed
        wf
        wien
        wiki
        works
        ws
        wtc
        wtf
        测试
           परीक्षा
        集团
        在线
        한국
         ভারত
        موقع
         বাংলা
        公益
        公司
        移动
        我爱你
        москва
        испытание
        қаз
        онлайн
        сайт
        срб
        테스트
        орг
        삼성
          சிங்கப்பூர்
        商标
        商城
        дети
        мкд
        טעסט
        中文网
        中信
        中国
        中國
                     భారత్
               ලංකා
        測試
                ભારત
           भारत
        آزمایشی
           பரிட்சை
           संगठन
        网络
        укр
        香港
        δοκιμή
        إختبار
        台湾
        台灣
        мон
        الجزائر
        عمان
        ایران
        امارات
        بازار
        پاکستان
        الاردن
        بھارت
        المغرب
        السعودية
        سودان
        مليسيا
        شبكة
        გე
        机构
        组织机构
                     ไทย
        سورية
        рф
        تونس
        みんな
        世界
                     ਭਾਰਤ
        网址
        游戏
        مصر
        قطر
          இலங்கை
          இந்தியா
        新加坡
        فلسطين
        テスト
        政务
        xxx
        xyz
        yachts
        ye
        yokohama
        yt
        za
        zm
        zone
        zw
    );
    
    return qr/(?:$plain)/i;
}

=head1 AUTHOR

Original code by Roderick Schertler <roderick@argon.org>, adapted by
Michael G Schwern <schwern@pobox.com>.

Currently maintained by Roderick Schertler <roderick@argon.org>.

=head1 SEE ALSO

  L<URI::Find>

=cut

1;