/usr/share/gnudatalanguage/astrolib/match.pro

pro match, a, b, suba, subb, COUNT = count, SORT = sort, epsilon=epsilon
;+
; NAME:
;       MATCH
; PURPOSE:
;       Routine to match values in two vectors.
;
; CALLING SEQUENCE:
;       match, a, b, suba, subb, [ COUNT =, /SORT, EPSILON =  ]
;
; INPUTS:
;       a,b - two vectors to match elements, numeric or string data types
;
; OUTPUTS:
;       suba - subscripts of elements in vector a with a match
;               in vector b
;       subb - subscripts of the positions of the elements in
;               vector b with matchs in vector a.
;
;       suba and subb are ordered such that a[suba] equals b[subb]
;       suba and subb are set to !NULL if there are no matches (or set to -1
;		if prior to IDL Version 8.0)
;
; OPTIONAL INPUT KEYWORD:
;       /SORT - By default, MATCH uses two different algorithm: (1) the
;               /REVERSE_INDICES keyword to HISTOGRAM is used for integer data,
;               while (2) a sorting algorithm is used for non-integer data.  The
;               histogram algorithm is usually faster, except when the input
;               vectors are sparse and contain very large numbers, possibly
;               causing memory problems.   Use the /SORT keyword to always use
;               the sort algorithm.
;       epsilon - if values are within epsilon, they are considered equal. Used only
;               only for non-integer matching.  Note that input vectors should 
;               be unique to within epsilon to provide one-to-one mapping.. 
;               Default=0.
;
; OPTIONAL KEYWORD OUTPUT:
;       COUNT - set to the number of matches, integer scalar
;
; SIDE EFFECTS:
;       The obsolete system variable !ERR is set to the number of matches;
;       however, the use !ERR is deprecated in favor of the COUNT keyword
;
; RESTRICTIONS:
;       The vectors a and b should not have duplicate values within them.
;       You can use rem_dup function to remove duplicate values
;       in a vector
;
; EXAMPLE:
;       If a = [3,5,7,9,11]   & b = [5,6,7,8,9,10]
;       then
;               IDL> match, a, b, suba, subb, COUNT = count
;
;       will give suba = [1,2,3], subb = [0,2,4],  COUNT = 3
;       and       a[suba] = b[subb] = [5,7,9]
;
;
; METHOD:
;       For non-integer data types, the two input vectors are combined and
;       sorted and the consecutive equal elements are identified.   For integer
;       data types, the /REVERSE_INDICES keyword to HISTOGRAM of each array
;       is used to identify where the two arrays have elements in common.
; HISTORY:
;       D. Lindler  Mar. 1986.
;       Fixed "indgen" call for very large arrays   W. Landsman  Sep 1991
;       Added COUNT keyword    W. Landsman   Sep. 1992
;       Fixed case where single element array supplied   W. Landsman Aug 95
;       Use a HISTOGRAM algorithm for integer vector inputs for improved
;             performance                W. Landsman         March 2000
;       Work again for strings           W. Landsman         April 2000
;       Use size(/type)                  W. Landsman         December 2002
;       Work for scalar integer input    W. Landsman         June 2003
;       Assume since V5.4, use COMPLEMENT to WHERE() W. Landsman Apr 2006
;       Added epsilon keyword            Kim Tolbert         March 14, 2008
;       Fix bug with Histogram method with all negative values W. Landsman/
;			R. Gutermuth, return !NULL for no matches  November 2017
;-
;-------------------------------------------------------------------------
 compile_opt idl2
  Catch, theError
  IF theError NE 0 then begin
     Catch,/Cancel
     void = cgErrorMsg(/quiet)
     RETURN
     ENDIF


 if N_elements(epsilon) EQ 0 then epsilon = 0

 if N_params() LT 3 then begin
     print,'Syntax - match, a, b, suba, subb, [ COUNT =, EPSILON=, /SORT]'
     print,'    a,b -- input vectors for which to match elements'
     print,'    suba,subb -- output subscript vectors of matched elements'
     return
 endif

 da = size(a,/type) & db =size(b,/type)
 if keyword_set(sort) then hist = 0b else $
 hist = (( da LE 3 ) || (da GE 12)) &&  ((db LE 3) || (db GE 12 ))

 if ~hist then begin           ;Non-integer calculation

 na = N_elements(a)              ;number of elements in a
 nb = N_elements(b)             ;number of elements in b

; Check for a single element array

 if (na EQ 1) || (nb EQ 1) then begin
        if (nb GT 1) then begin
                subb = where(b EQ a[0], nw)
                if (nw GT 0) then suba = replicate(0,nw) else suba = [-1]
        endif else begin
                suba = where(a EQ b[0], nw)
                if (nw GT 0) then subb = replicate(0,nw) else subb = [-1]
        endelse
        count = nw
        return
 endif

 c = [ a, b ]                   ;combined list of a and b
 ind = [ lindgen(na), lindgen(nb) ]       ;combined list of indices
 vec = [ bytarr(na), replicate(1b,nb) ]  ;flag of which vector in  combined
                                         ;list   0 - a   1 - b

; sort combined list

 sub = sort(c)
 c = c[sub]
 ind = ind[sub]
 vec = vec[sub]

; find duplicates in sorted combined list

 n = na + nb                            ;total elements in c
 if epsilon eq 0. then $
    firstdup = where( (c EQ shift(c,-1)) and (vec NE shift(vec,-1)), Count ) $
 else $
    firstdup = where( (abs(c - shift(c,-1)) lt epsilon) and (vec NE shift(vec,-1)), Count )

 if Count EQ 0 then begin               ;any found?
        suba = lonarr(1)-1
        subb = lonarr(1)-1
        return
 end

 dup = lonarr( Count*2 )                     ;both duplicate values
 even = lindgen( N_elements(firstdup))*2     ;Changed to LINDGEN 6-Sep-1991
 dup[even] = firstdup
 dup[even+1] = firstdup+1
 ind = ind[dup]                         ;indices of duplicates
 vec = vec[dup]                         ;vector id of duplicates
 subb = ind[ where( vec, complement = vzero) ]             ;b subscripts
 suba = ind[ vzero]

 endif else begin             ;Integer calculation using histogram.

 minab = min(a, MAX=maxa) > min(b, MAX=maxb) ;Only need intersection of ranges
 maxab = maxa < maxb

;If either set is empty, or their ranges don't intersect:
;  result = NULL (which is denoted by integer = -1)
  !ERR = -1
  if !VERSION.RELEASE GE '8.0' then begin 
		suba = !NULL 
		subb = !NULL
	endif else begin 	  
		suba = -1
		subb = -1
	endelse
  COUNT = 0L
 if maxab lt minab then return       ;No overlap 

 ha = histogram([a], MIN=minab, MAX=maxab, reverse_indices=reva)
 hb = histogram([b], MIN=minab, MAX=maxab, reverse_indices=revb)

 r = where((ha ne 0) and (hb ne 0), count)

 if count gt 0 then begin
  suba = reva[reva[r]]
  subb = revb[revb[r]]
 endif
 endelse

 return

 end
gdl-astrolib 2018.02.16+dfsg-1 / usr / share / gnudatalanguage / astrolib / match.pro