/usr/lib/python3/dist-packages/pyutilib/misc/comparison.py

#  _________________________________________________________________________
#
#  PyUtilib: A Python utility library.
#  Copyright (c) 2008 Sandia Corporation.
#  This software is distributed under the BSD License.
#  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
#  the U.S. Government retains certain rights in this software.
#  _________________________________________________________________________

import re
import copy
import sys
import os
import os.path
import difflib
import zipfile
import gzip
import filecmp
import math
if sys.version_info >= (3,0):
    xrange = range
    import io

strict_float_p = re.compile(r"(?<![\w+-\.])(?:[+-])?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\b")
relaxed_float_p = re.compile(r"(?:[+-])?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?")
whitespace_p = re.compile(r" +")

def remove_chars_in_list(s, l):
    if len(l) == 0:
        return s

    return "".join(x for x in s if x not in l)


def get_desired_chars_from_file(f, nchars, l=""):
    retBuf = ""
    while nchars > 0:
        buf = f.read(nchars)
        if len(buf) == 0:
            break

        buf = remove_chars_in_list(buf, l)
        nchars -= len(buf)
        retBuf = retBuf + buf

    return retBuf


if sys.version_info[:2] == (3,2):
    #
    # This fixes a bug in Python 3.2's implementation of GzipFile.
    #
    class MyGzipFile(gzip.GzipFile):
        def read1(self, n):
            return self.read(n)
 
def open_possibly_compressed_file(filename):
    if not os.path.exists(filename):
        raise IOError("cannot find file `"+filename+"'")
    if sys.version_info[:2] < (2,6) and zipfile.is_zipfile(filename):
        raise IOError( "cannot unpack a ZIP file with Python %s" 
                       % '.'.join(map(str,sys.version_info)) )
    if zipfile.is_zipfile(filename):
        zf1=zipfile.ZipFile(filename,"r")
        if len(zf1.namelist()) != 1:
            raise IOError("cannot compare with a zip file that contains "
                          "multiple files: `"+filename+"'")
        if sys.version_info < (3,0):
            return zf1.open(zf1.namelist()[0],'r')
        else:
            return io.TextIOWrapper(zf1.open(zf1.namelist()[0],'r'), encoding='utf-8', newline='')
    elif filename.endswith('.gz'):
        if sys.version_info < (3,0):
            return gzip.open(filename,"r")
        elif sys.version_info[:2] == (3,2):
            return io.TextIOWrapper(MyGzipFile(filename), encoding='utf-8', newline='')
        else:
            return io.TextIOWrapper(gzip.open(filename,'r'), encoding='utf-8', newline='')
    else:
        return open(filename,"r")


def file_diff(filename1, filename2, lineno=None, context=None):
    INPUT1=open_possibly_compressed_file(filename1)
    lines1 = INPUT1.readlines()
    for i in range(0,len(lines1)):
        lines1[i] = lines1[i].strip()
    INPUT1.close()

    INPUT2=open_possibly_compressed_file(filename2)
    lines2 = INPUT2.readlines()
    for i in range(0,len(lines2)):
        lines2[i] = lines2[i].strip()
    INPUT2.close()

    s=""
    if lineno is None:
        for line in difflib.unified_diff(lines2,lines1,fromfile=filename2,tofile=filename1):
            s += line+"\n"
    else:
        if context is None:
            context = 3
        start = lineno-context
        stop = lineno+context
        if start < 0:
            start=0
        if stop > len(lines1):
            stop = len(lines1)
        if stop > len(lines2):
            stop = len(lines2)
        for line in difflib.unified_diff(lines2[start:stop],lines1[start:stop],fromfile=filename2,tofile=filename1):
            s += line+"\n"
    return s


def read_and_filter_line(stream, ignore_chars, filter):
    # If either line is composed entirely of characters to
    # ignore, then get another one.  In this way we can
    # skip blank lines that are in one file but not the other
    lineno = 0
    line = ""
    while not line:
        line = stream.readline()
        lineno += 1
        if line == "":
            return None, lineno
        line_ = remove_chars_in_list(line, ignore_chars)
        if not line_:
            line = False
            continue
        if filter is not None:
            filtered = filter(line)
            if filtered is True:
                line = False            # Ignore this line
            elif filtered is False:
                line = line_
            else:
                line = filtered
        else:
            line = line_
    return line, lineno


def compare_file_with_numeric_values(filename1, filename2, ignore=["\n","\r"], filter=None, tolerance=0.0, strict_numbers=True):
    """
    Do a simple comparison of two files that ignores differences
    in newline types and whitespace.  Numeric values are compared within a specified tolerance.

    The return value is the tuple: (status,lineno).  If status is True,
    then a difference has occured on the specified line number.  If
    the status is False, then lineno is None.

    The goal of this utility is to simply indicate whether there are
    differences in files.  The Python 'difflib' is much more comprehensive
    and consequently more costly to apply.  The shutil.filecmp utility is
    similar, but it does not ignore differences in file newlines.  Also,
    this utility can ignore an arbitrary set of characters.
    """
    if not os.path.exists(filename1):
        raise IOError("compare_file: cannot find file `"+filename1+
                      "' (in "+os.getcwd()+")")
    if not os.path.exists(filename2):
        raise IOError("compare_file: cannot find file `"+filename2+
                      "' (in "+os.getcwd()+")")

    if filecmp.cmp(filename1, filename2):
        return [False, None, ""]

    if strict_numbers:
        float_p = strict_float_p
    else:
        float_p = relaxed_float_p

    INPUT1=open_possibly_compressed_file(filename1)
    try:
        INPUT2=open_possibly_compressed_file(filename2)
    except IOError:
        INPUT1.close()
        raise
    lineno=0
    while True:

        # If either line is composed entirely of characters to
        # ignore, then get another one.  In this way we can
        # skip blank lines that are in one file but not the other

        try:
            line1, delta_lineno = read_and_filter_line(INPUT1, ignore, filter)
        except UnicodeDecodeError:
            err = sys.exc_info()[1]
            raise RuntimeError("Decoding error while processing file %s: %s" % (filename1, str(err)))
        lineno += delta_lineno
        try:
            line2 = read_and_filter_line(INPUT2, ignore, filter)[0]
        except UnicodeDecodeError:
            err = sys.exc_info()[1]
            raise RuntimeError("Decoding error while processing file %s: %s" % (filename2, str(err)))

        #print "line1 '%s'" % line1
        #print "line2 '%s'" % line2

        if line1 is None and line2 is None:
            INPUT1.close()
            INPUT2.close()
            return [False, None, ""]

        if line1 is None or line2 is None:
            INPUT1.close()
            INPUT2.close()
            return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]

        floats1 = float_p.findall(line1)
        floats2 = float_p.findall(line2)
        #print "floats1 '%s'" % floats1
        #print "floats2 '%s'" % floats2

        if len(floats1) != len(floats2):
            INPUT1.close()
            INPUT2.close()
            return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]

        if len(floats1) > 0:
            for i in xrange(len(floats1)):
                if floats1[i] == floats2[i]:
                    continue
                try:
                    v1 = float(floats1[i])
                    v2 = float(floats2[i])
                except Exception:
                    INPUT1.close()
                    INPUT2.close()
                    return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]
                if math.fabs(v1-v2) > tolerance:
                    INPUT1.close()
                    INPUT2.close()
                    return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]

        line1 = float_p.sub('#', whitespace_p.sub(' ', line1.strip()))
        line2 = float_p.sub('#', whitespace_p.sub(' ', line2.strip()))

        #print "Line1 '%s'" % line1
        #print "Line2 '%s'" % line2

        index1=0
        index2=0
        while True:
            # Set the value of nc1
            if index1 == len(line1):
                nc1=None
            else:
                nc1=line1[index1]
            # Set the value of nc2
            if index2 == len(line2):
                nc2=None
            else:
                nc2=line2[index2]
            # Compare curent character values
            if nc1 != nc2:
                INPUT1.close()
                INPUT2.close()
                return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]
            if nc1 is None and nc2 is None:
                break
            index1=index1+1
            index2=index2+1

    INPUT1.close()
    INPUT2.close()
    return [False, None, ""]
                        

def compare_file(filename1,filename2, ignore=["\t"," ","\n","\r"], filter=None, tolerance=None):
    """
    Do a simple comparison of two files that ignores differences
    in newline types.  If filename1 or filename2 is a zipfile, then it is
    assumed to contain a single file.

    The return value is the tuple: (status,lineno).  If status is True,
    then a difference has occured on the specified line number.  If
    the status is False, then lineno is None.

    The goal of this utility is to simply indicate whether there are
    differences in files.  The Python 'difflib' is much more comprehensive
    and consequently more costly to apply.  The shutil.filecmp utility is
    similar, but it does not ignore differences in file newlines.  Also,
    this utility can ignore an arbitrary set of characters.

    The 'filter' function evaluates each line separately.  If it returns True,
    then that line should be ignored.  If it returns a string, then that string replaces
    the line.
    """
    if tolerance is not None:
        tmp = copy.copy(ignore)
        tmp.remove(' ')
        tmp.remove('\t')
        try:
            tol, strict = tolerance
        except:
            tol = tolerance
            strict = True
        return compare_file_with_numeric_values(filename1, filename2, ignore=tmp, filter=filter, tolerance=tol, strict_numbers=strict)

    if not os.path.exists(filename1):
        raise IOError("compare_file: cannot find file `"+filename1+
                      "' (in "+os.getcwd()+")")
    if not os.path.exists(filename2):
        raise IOError("compare_file: cannot find file `"+filename2+
                      "' (in "+os.getcwd()+")")

    INPUT1 = open_possibly_compressed_file(filename1)
    try:
        INPUT2 = open_possibly_compressed_file(filename2)
    except IOError:
        INPUT1.close()
        raise
    #
    # This is check is deferred until the zipfiles are setup to ensure a
    # consistent logic for zipfile analysis.  If the files are the same,
    # but they are zipfiles with > 1 files, then we raise an exception.
    #
    if not sys.platform.startswith('win') and os.stat(filename1) == os.stat(filename2):
        INPUT1.close()
        INPUT2.close()
        return [False, None, ""]
    #
    lineno=0
    while True:

        # If either line is composed entirely of characters to
        # ignore, then get another one.  In this way we can
        # skip blank lines that are in one file but not the other

        line1, delta_lineno = read_and_filter_line(INPUT1, ignore, filter)
        lineno += delta_lineno
        line2 = read_and_filter_line(INPUT2, ignore, filter)[0]

        if line1 is None and line2 is None:
            INPUT1.close()
            INPUT2.close()
            return [False, None, ""]

        if line1 is None or line2 is None:
            INPUT1.close()
            INPUT2.close()
            return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]

        index1=0
        index2=0
        while True:
            # Set the value of nc1
            if index1 == len(line1):
                nc1=None
            else:
                nc1=line1[index1]
            # Set the value of nc2
            if index2 == len(line2):
                nc2=None
            else:
                nc2=line2[index2]
            # Compare curent character values
            if nc1 != nc2:
                INPUT1.close()
                INPUT2.close()
                return [True, lineno, file_diff(filename1,filename2, lineno=lineno)]
            if nc1 is None and nc2 is None:
                break
            index1=index1+1
            index2=index2+1
    #
    INPUT1.close()
    INPUT2.close()


def compare_large_file(filename1,filename2, ignore=["\t"," ","\n","\r"], bufSize=1 * 1024 * 1024):
    """
    Do a simple comparison of two files that ignores white space, or
    characters specified in "ignore" list.

    The return value is True if a difference is found, False otherwise.

    For very long text files, this function will be faster than
    compare_file() because it reads the files in by large chunks
    instead of by line.  The cost is that you don't get the lineno
    at which the difference occurs.
    """

    INPUT1 = open_possibly_compressed_file(filename1)
    try:
        INPUT2 = open_possibly_compressed_file(filename2)
    except IOError:
        INPUT1.close()
        raise
    #
    # This is check is deferred until the zipfiles are setup to ensure a consistent logic for
    # zipfile analysis.  If the files are the same, but they are zipfiles with > 1 files, then we
    # raise an exception.
    #
    if not sys.platform.startswith('win') and os.stat(filename1) == os.stat(filename2):
        INPUT1.close()
        INPUT2.close()
        return False

    f1Size = os.stat(filename1).st_size
    f2Size = os.stat(filename2).st_size

    result = False

    while True:
        buf1 = get_desired_chars_from_file(INPUT1, bufSize, ignore)
        buf2 = get_desired_chars_from_file(INPUT2, bufSize, ignore)

        if len(buf1) == 0 and len(buf2) == 0:
            break
        elif len(buf1) == 0 or len(buf2) == 0:
            result = True
            break

        if len(buf1) != len(buf2) or buf1 != buf2 :
            result = True
            break

    INPUT1.close()
    INPUT2.close()
    return result
python3-pyutilib 5.3.5-1 / usr / lib / python3 / dist-packages / pyutilib / misc / comparison.py