/usr/lib/python3/dist-packages/ldif.py

"""
ldif - generate and parse LDIF data (see RFC 2849)

See https://www.python-ldap.org/ for details.
"""

from __future__ import unicode_literals

__version__ = '3.0.0'

__all__ = [
  # constants
  'ldif_pattern',
  # functions
  'CreateLDIF','ParseLDIF',
  # classes
  'LDIFWriter',
  'LDIFParser',
  'LDIFRecordList',
  'LDIFCopy',
]

import re
from base64 import b64encode, b64decode
from io import StringIO
import warnings

from ldap.compat import urlparse, urlopen

attrtype_pattern = r'[\w;.-]+(;[\w_-]+)*'
attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
attrtypeandvalue_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
rdn_pattern   = attrtypeandvalue_pattern + r'([ ]*\+[ ]*' + attrtypeandvalue_pattern + r')*[ ]*'
dn_pattern   = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
dn_regex   = re.compile('^%s$' % dn_pattern)

ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()

MOD_OP_INTEGER = {
  'add':0, # ldap.MOD_ADD
  'delete':1, # ldap.MOD_DELETE
  'replace':2, # ldap.MOD_REPLACE
  'increment':3, # ldap.MOD_INCREMENT
}

MOD_OP_STR = {
  0:'add',1:'delete',2:'replace',3:'increment'
}

CHANGE_TYPES = ['add','delete','modify','modrdn']
valid_changetype_dict = {}
for c in CHANGE_TYPES:
  valid_changetype_dict[c]=None


def is_dn(s):
  """
  returns 1 if s is a LDAP DN
  """
  if s=='':
    return 1
  rm = dn_regex.match(s)
  return rm!=None and rm.group(0)==s


SAFE_STRING_PATTERN = b'(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
safe_string_re = re.compile(SAFE_STRING_PATTERN)

def list_dict(l):
  """
  return a dictionary with all items of l being the keys of the dictionary
  """
  return {i: None for i in l}


class LDIFWriter:
  """
  Write LDIF entry or change records to file object
  Copy LDIF input to a file output object containing all data retrieved
  via URLs
  """

  def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
    """
    output_file
        file object for output; should be opened in *text* mode
    base64_attrs
        list of attribute types to be base64-encoded in any case
    cols
        Specifies how many columns a line may have before it's
        folded into many lines.
    line_sep
        String used as line separator
    """
    self._output_file = output_file
    self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
    self._cols = cols
    self._last_line_sep = line_sep
    self.records_written = 0

  def _unfold_lines(self,line):
    """
    Write string line as one or more folded lines
    """
    # Check maximum line length
    line_len = len(line)
    if line_len<=self._cols:
      self._output_file.write(line)
      self._output_file.write(self._last_line_sep)
    else:
      # Fold line
      pos = self._cols
      self._output_file.write(line[0:min(line_len,self._cols)])
      self._output_file.write(self._last_line_sep)
      while pos<line_len:
        self._output_file.write(' ')
        self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
        self._output_file.write(self._last_line_sep)
        pos = pos+self._cols-1
    return # _unfold_lines()

  def _needs_base64_encoding(self,attr_type,attr_value):
    """
    returns 1 if attr_value has to be base-64 encoded because
    of special chars or because attr_type is in self._base64_attrs
    """
    return attr_type.lower() in self._base64_attrs or \
           not safe_string_re.search(attr_value) is None

  def _unparseAttrTypeandValue(self,attr_type,attr_value):
    """
    Write a single attribute type/value pair

    attr_type
          attribute type (text)
    attr_value
          attribute value (bytes)
    """
    if self._needs_base64_encoding(attr_type,attr_value):
      # Encode with base64
      encoded = b64encode(attr_value).decode('ascii')
      encoded = encoded.replace('\n','')
      self._unfold_lines(':: '.join([attr_type, encoded]))
    else:
      self._unfold_lines(': '.join([attr_type, attr_value.decode('ascii')]))
    return # _unparseAttrTypeandValue()

  def _unparseEntryRecord(self,entry):
    """
    entry
        dictionary holding an entry
    """
    for attr_type in sorted(entry.keys()):
      for attr_value in entry[attr_type]:
        self._unparseAttrTypeandValue(attr_type,attr_value)

  def _unparseChangeRecord(self,modlist):
    """
    modlist
        list of additions (2-tuple) or modifications (3-tuple)
    """
    mod_len = len(modlist[0])
    if mod_len==2:
      changetype = 'add'
    elif mod_len==3:
      changetype = 'modify'
    else:
      raise ValueError("modlist item of wrong length: %d" % (mod_len))
    self._unparseAttrTypeandValue('changetype',changetype.encode('ascii'))
    for mod in modlist:
      if mod_len==2:
        mod_type,mod_vals = mod
      elif mod_len==3:
        mod_op,mod_type,mod_vals = mod
        self._unparseAttrTypeandValue(MOD_OP_STR[mod_op],
                                      mod_type.encode('ascii'))
      else:
        raise ValueError("Subsequent modlist item of wrong length")
      if mod_vals:
        for mod_val in mod_vals:
          self._unparseAttrTypeandValue(mod_type,mod_val)
      if mod_len==3:
        self._output_file.write('-'+self._last_line_sep)

  def unparse(self,dn,record):
    """
    dn
          string-representation of distinguished name
    record
          Either a dictionary holding the LDAP entry {attrtype:record}
          or a list with a modify list like for LDAPObject.modify().
    """
    # Start with line containing the distinguished name
    dn = dn.encode('utf-8')
    self._unparseAttrTypeandValue('dn', dn)
    # Dispatch to record type specific writers
    if isinstance(record,dict):
      self._unparseEntryRecord(record)
    elif isinstance(record,list):
      self._unparseChangeRecord(record)
    else:
      raise ValueError('Argument record must be dictionary or list instead of %s' % (repr(record)))
    # Write empty line separating the records
    self._output_file.write(self._last_line_sep)
    # Count records written
    self.records_written = self.records_written+1
    return # unparse()


def CreateLDIF(dn,record,base64_attrs=None,cols=76):
  """
  Create LDIF single formatted record including trailing empty line.
  This is a compatibility function.

  dn
        string-representation of distinguished name
  record
        Either a dictionary holding the LDAP entry {attrtype:record}
        or a list with a modify list like for LDAPObject.modify().
  base64_attrs
        list of attribute types to be base64-encoded in any case
  cols
        Specifies how many columns a line may have before it's
        folded into many lines.
  """
  warnings.warn(
    'ldif.CreateLDIF() is deprecated. Use LDIFWriter.unparse() instead. It '
    'will be removed in python-ldap 3.1',
    category=DeprecationWarning,
    stacklevel=2,
  )
  f = StringIO()
  ldif_writer = LDIFWriter(f,base64_attrs,cols,'\n')
  ldif_writer.unparse(dn,record)
  s = f.getvalue()
  f.close()
  return s


class LDIFParser:
  """
  Base class for a LDIF parser. Applications should sub-class this
  class and override method handle() to implement something meaningful.

  Public class attributes:

  records_read
        Counter for records processed so far
  """

  def __init__(
    self,
    input_file,
    ignored_attr_types=None,
    max_entries=0,
    process_url_schemes=None,
    line_sep='\n'
  ):
    """
    Parameters:
    input_file
        File-object to read the LDIF input from
    ignored_attr_types
        Attributes with these attribute type names will be ignored.
    max_entries
        If non-zero specifies the maximum number of entries to be
        read from f.
    process_url_schemes
        List containing strings with URLs schemes to process with urllib.
        An empty list turns off all URL processing and the attribute
        is ignored completely.
    line_sep
        String used as line separator
    """
    self._input_file = input_file
    # Detect whether the file is open in text or bytes mode.
    self._file_sends_bytes = isinstance(self._input_file.read(0), bytes)
    self._max_entries = max_entries
    self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
    self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
    self._last_line_sep = line_sep
    self.version = None
    # Initialize counters
    self.line_counter = 0
    self.byte_counter = 0
    self.records_read = 0
    self.changetype_counter = {}.fromkeys(CHANGE_TYPES,0)
    # Store some symbols for better performance
    self._b64decode = b64decode
    # Read very first line
    try:
      self._last_line = self._readline()
    except EOFError:
      self._last_line = ''

  def handle(self,dn,entry):
    """
    Process a single content LDIF record. This method should be
    implemented by applications using LDIFParser.
    """
    pass

  def _readline(self):
    s = self._input_file.readline()
    if self._file_sends_bytes:
      # The RFC does not allow UTF-8 values; we support it as a
      # non-official, backwards compatibility layer
      s = s.decode('utf-8')
    self.line_counter = self.line_counter + 1
    self.byte_counter = self.byte_counter + len(s)
    if not s:
      return None
    elif s[-2:]=='\r\n':
      return s[:-2]
    elif s[-1:]=='\n':
      return s[:-1]
    else:
      return s

  def _unfold_lines(self):
    """
    Unfold several folded lines with trailing space into one line
    """
    if self._last_line is None:
      raise EOFError('EOF reached after %d lines (%d bytes)' % (
        self.line_counter,
        self.byte_counter,
      ))
    unfolded_lines = [ self._last_line ]
    next_line = self._readline()
    while next_line and next_line[0]==' ':
      unfolded_lines.append(next_line[1:])
      next_line = self._readline()
    self._last_line = next_line
    return ''.join(unfolded_lines)

  def _next_key_and_value(self):
    """
    Parse a single attribute type and value pair from one or
    more lines of LDIF data

    Returns attr_type (text) and attr_value (bytes)
    """
    # Reading new attribute line
    unfolded_line = self._unfold_lines()
    # Ignore comments which can also be folded
    while unfolded_line and unfolded_line[0]=='#':
      unfolded_line = self._unfold_lines()
    if not unfolded_line:
      return None,None
    if unfolded_line=='-':
      return '-',None
    try:
      colon_pos = unfolded_line.index(':')
    except ValueError as e:
      raise ValueError('no value-spec in %s' % (repr(unfolded_line)))
    attr_type = unfolded_line[0:colon_pos]
    # if needed attribute value is BASE64 decoded
    value_spec = unfolded_line[colon_pos:colon_pos+2]
    if value_spec==': ':
      attr_value = unfolded_line[colon_pos+2:].lstrip()
      # All values should be valid ascii; we support UTF-8 as a
      # non-official, backwards compatibility layer.
      attr_value = attr_value.encode('utf-8')
    elif value_spec=='::':
      # attribute value needs base64-decoding
      # base64 makes sens only for ascii
      attr_value = unfolded_line[colon_pos+2:]
      attr_value = attr_value.encode('ascii')
      attr_value = self._b64decode(attr_value)
    elif value_spec==':<':
      # fetch attribute value from URL
      url = unfolded_line[colon_pos+2:].strip()
      attr_value = None
      if self._process_url_schemes:
        u = urlparse(url)
        if u[0] in self._process_url_schemes:
          attr_value = urlopen(url).read()
    else:
      # All values should be valid ascii; we support UTF-8 as a
      # non-official, backwards compatibility layer.
      attr_value = unfolded_line[colon_pos+1:].encode('utf-8')
    return attr_type,attr_value

  def _consume_empty_lines(self):
    """
    Consume empty lines until first non-empty line.
    Must only be used between full records!

    Returns non-empty key-value-tuple.
    """
    # Local symbol for better performance
    next_key_and_value = self._next_key_and_value
    # Consume empty lines
    try:
      k,v = next_key_and_value()
      while k is None and v is None:
        k,v = next_key_and_value()
    except EOFError:
      k,v = None,None
    return k,v

  def parse_entry_records(self):
    """
    Continuously read and parse LDIF entry records
    """
    # Local symbol for better performance
    next_key_and_value = self._next_key_and_value

    try:
      # Consume empty lines
      k,v = self._consume_empty_lines()
      # Consume 'version' line
      if k=='version':
        self.version = int(v.decode('ascii'))
        k,v = self._consume_empty_lines()
    except EOFError:
      return

    # Loop for processing whole records
    while k!=None and \
          (not self._max_entries or self.records_read<self._max_entries):
      # Consume first line which must start with "dn: "
      if k!='dn':
        raise ValueError('Line %d: First line of record does not start with "dn:": %s' % (self.line_counter,repr(k)))
      # Value of a 'dn' field *has* to be valid UTF-8
      # k is text, v is bytes.
      v = v.decode('utf-8')
      if not is_dn(v):
        raise ValueError('Line %d: Not a valid string-representation for dn: %s.' % (self.line_counter,repr(v)))
      dn = v
      entry = {}
      # Consume second line of record
      k,v = next_key_and_value()

      # Loop for reading the attributes
      while k!=None:
        # Add the attribute to the entry if not ignored attribute
        if not k.lower() in self._ignored_attr_types:
          try:
            entry[k].append(v)
          except KeyError:
            entry[k]=[v]
        # Read the next line within the record
        try:
          k,v = next_key_and_value()
        except EOFError:
          k,v = None,None

      # handle record
      self.handle(dn,entry)
      self.records_read = self.records_read + 1
      # Consume empty separator line(s)
      k,v = self._consume_empty_lines()
    return # parse_entry_records()

  def parse(self):
    """
    Invokes LDIFParser.parse_entry_records() for backward compatibility
    """
    return self.parse_entry_records() # parse()

  def handle_modify(self,dn,modops,controls=None):
    """
    Process a single LDIF record representing a single modify operation.
    This method should be implemented by applications using LDIFParser.
    """
    controls = [] or None
    pass

  def parse_change_records(self):
    # Local symbol for better performance
    next_key_and_value = self._next_key_and_value
    # Consume empty lines
    k,v = self._consume_empty_lines()
    # Consume 'version' line
    if k=='version':
      self.version = int(v)
      k,v = self._consume_empty_lines()

    # Loop for processing whole records
    while k!=None and \
          (not self._max_entries or self.records_read<self._max_entries):
      # Consume first line which must start with "dn: "
      if k!='dn':
        raise ValueError('Line %d: First line of record does not start with "dn:": %s' % (self.line_counter,repr(k)))
      # Value of a 'dn' field *has* to be valid UTF-8
      # k is text, v is bytes.
      v = v.decode('utf-8')
      if not is_dn(v):
        raise ValueError('Line %d: Not a valid string-representation for dn: %s.' % (self.line_counter,repr(v)))
      dn = v
      # Consume second line of record
      k,v = next_key_and_value()
      # Read "control:" lines
      controls = []
      while k!=None and k=='control':
        # v is still bytes, spec says it should be valid utf-8; decode it.
        v = v.decode('utf-8')
        try:
          control_type,criticality,control_value = v.split(' ',2)
        except ValueError:
          control_value = None
          control_type,criticality = v.split(' ',1)
        controls.append((control_type,criticality,control_value))
        k,v = next_key_and_value()

      # Determine changetype first
      changetype = None
      # Consume changetype line of record
      if k=='changetype':
        # v is still bytes, spec says it should be valid utf-8; decode it.
        v = v.decode('utf-8')
        if not v in valid_changetype_dict:
          raise ValueError('Invalid changetype: %s' % repr(v))
        changetype = v
        k,v = next_key_and_value()

      if changetype=='modify':

        # From here we assume a change record is read with changetype: modify
        modops = []

        try:
          # Loop for reading the list of modifications
          while k!=None:
            # Extract attribute mod-operation (add, delete, replace)
            try:
              modop = MOD_OP_INTEGER[k]
            except KeyError:
              raise ValueError('Line %d: Invalid mod-op string: %s' % (self.line_counter,repr(k)))
            # we now have the attribute name to be modified
            # v is still bytes, spec says it should be valid utf-8; decode it.
            v = v.decode('utf-8')
            modattr = v
            modvalues = []
            try:
              k,v = next_key_and_value()
            except EOFError:
              k,v = None,None
            while k==modattr:
              modvalues.append(v)
              try:
                k,v = next_key_and_value()
              except EOFError:
                k,v = None,None
            modops.append((modop,modattr,modvalues or None))
            k,v = next_key_and_value()
            if k=='-':
              # Consume next line
              k,v = next_key_and_value()
        except EOFError:
          k,v = None,None

        if modops:
          # append entry to result list
          self.handle_modify(dn,modops,controls)

      else:

        # Consume the unhandled change record
        while k!=None:
          k,v = next_key_and_value()

      # Consume empty separator line(s)
      k,v = self._consume_empty_lines()

      # Increment record counters
      try:
        self.changetype_counter[changetype] = self.changetype_counter[changetype] + 1
      except KeyError:
        self.changetype_counter[changetype] = 1
      self.records_read = self.records_read + 1

    return # parse_change_records()


class LDIFRecordList(LDIFParser):
  """
  Collect all records of a LDIF file. It can be a memory hog!

  Records are stored in :attr:`.all_records` as a single list
  of 2-tuples (dn, entry), after calling :meth:`.parse`.
  """

  def __init__(
    self,
    input_file,
    ignored_attr_types=None,max_entries=0,process_url_schemes=None
  ):
    LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)

    #: List storing parsed records.
    self.all_records = []
    self.all_modify_changes = []

  def handle(self,dn,entry):
    """
    Append a single record to the list of all records (:attr:`.all_records`).
    """
    self.all_records.append((dn,entry))

  def handle_modify(self,dn,modops,controls=None):
    """
    Process a single LDIF record representing a single modify operation.
    This method should be implemented by applications using LDIFParser.
    """
    controls = [] or None
    self.all_modify_changes.append((dn,modops,controls))


class LDIFCopy(LDIFParser):
  """
  Copy LDIF input to LDIF output containing all data retrieved
  via URLs
  """

  def __init__(
    self,
    input_file,output_file,
    ignored_attr_types=None,max_entries=0,process_url_schemes=None,
    base64_attrs=None,cols=76,line_sep='\n'
  ):
    """
    See LDIFParser.__init__() and LDIFWriter.__init__()
    """
    LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
    self._output_ldif = LDIFWriter(output_file,base64_attrs,cols,line_sep)

  def handle(self,dn,entry):
    """
    Write single LDIF record to output file.
    """
    self._output_ldif.unparse(dn,entry)


def ParseLDIF(f,ignore_attrs=None,maxentries=0):
  """
  Parse LDIF records read from file.
  This is a compatibility function.
  """
  warnings.warn(
    'ldif.ParseLDIF() is deprecated. Use LDIFRecordList.parse() instead. It '
    'will be removed in python-ldap 3.1',
    category=DeprecationWarning,
    stacklevel=2,
  )
  ldif_parser = LDIFRecordList(
    f,ignored_attr_types=ignore_attrs,max_entries=maxentries,process_url_schemes=0
  )
  ldif_parser.parse()
  return ldif_parser.all_records
python3-ldap 3.0.0-1 / usr / lib / python3 / dist-packages / ldif.py