/usr/share/pyshared/gitdb/db/loose.py

# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from base import (
						FileDBBase, 
						ObjectDBR, 
						ObjectDBW
				)


from gitdb.exc import (
	InvalidDBRoot, 
	BadObject,
	AmbiguousObjectName
	)

from gitdb.stream import (
		DecompressMemMapReader,
		FDCompressedSha1Writer,
		FDStream,
		Sha1Writer
	)

from gitdb.base import (
							OStream,
							OInfo
						)

from gitdb.util import (
		file_contents_ro_filepath,
		ENOENT,
		hex_to_bin,
		bin_to_hex,
		exists,
		chmod,
		isdir,
		isfile,
		remove,
		mkdir,
		rename,
		dirname,
		basename,
		join
	)

from gitdb.fun import ( 
	chunk_size,
	loose_object_header_info, 
	write_object,
	stream_copy
	)

import tempfile
import mmap
import sys
import os


__all__ = ( 'LooseObjectDB', )


class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
	"""A database which operates on loose object files"""
	
	# CONFIGURATION
	# chunks in which data will be copied between streams
	stream_chunk_size = chunk_size
	
	# On windows we need to keep it writable, otherwise it cannot be removed
	# either
	new_objects_mode = 0444
	if os.name == 'nt':
		new_objects_mode = 0644
			
	
	def __init__(self, root_path):
		super(LooseObjectDB, self).__init__(root_path)
		self._hexsha_to_file = dict()
		# Additional Flags - might be set to 0 after the first failure
		# Depending on the root, this might work for some mounts, for others not, which
		# is why it is per instance
		self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
	
	#{ Interface 
	def object_path(self, hexsha):
		"""
		:return: path at which the object with the given hexsha would be stored, 
			relative to the database root"""
		return join(hexsha[:2], hexsha[2:])
	
	def readable_db_object_path(self, hexsha):
		"""
		:return: readable object path to the object identified by hexsha
		:raise BadObject: If the object file does not exist"""
		try:
			return self._hexsha_to_file[hexsha]
		except KeyError:
			pass
		# END ignore cache misses 
			
		# try filesystem
		path = self.db_path(self.object_path(hexsha))
		if exists(path):
			self._hexsha_to_file[hexsha] = path
			return path
		# END handle cache
		raise BadObject(hexsha)
		
	def partial_to_complete_sha_hex(self, partial_hexsha):
		""":return: 20 byte binary sha1 string which matches the given name uniquely
		:param name: hexadecimal partial name
		:raise AmbiguousObjectName: 
		:raise BadObject: """
		candidate = None
		for binsha in self.sha_iter():
			if bin_to_hex(binsha).startswith(partial_hexsha):
				# it can't ever find the same object twice
				if candidate is not None:
					raise AmbiguousObjectName(partial_hexsha)
				candidate = binsha
		# END for each object
		if candidate is None:
			raise BadObject(partial_hexsha)
		return candidate
		
	#} END interface
	
	def _map_loose_object(self, sha):
		"""
		:return: memory map of that file to allow random read access
		:raise BadObject: if object could not be located"""
		db_path = self.db_path(self.object_path(bin_to_hex(sha)))
		try:
			return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
		except OSError,e:
			if e.errno != ENOENT:
				# try again without noatime
				try:
					return file_contents_ro_filepath(db_path)
				except OSError:
					raise BadObject(sha)
				# didn't work because of our flag, don't try it again
				self._fd_open_flags = 0
			else:
				raise BadObject(sha)
			# END handle error
		# END exception handling
		try:
			return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
		finally:
			os.close(fd)
		# END assure file is closed
		
	def set_ostream(self, stream):
		""":raise TypeError: if the stream does not support the Sha1Writer interface"""
		if stream is not None and not isinstance(stream, Sha1Writer):
			raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
		return super(LooseObjectDB, self).set_ostream(stream)
			
	def info(self, sha):
		m = self._map_loose_object(sha)
		try:
			type, size = loose_object_header_info(m)
			return OInfo(sha, type, size)
		finally:
			m.close()
		# END assure release of system resources
		
	def stream(self, sha):
		m = self._map_loose_object(sha)
		type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
		return OStream(sha, type, size, stream)
		
	def has_object(self, sha):
		try:
			self.readable_db_object_path(bin_to_hex(sha))
			return True
		except BadObject:
			return False
		# END check existance
	
	def store(self, istream):
		"""note: The sha we produce will be hex by nature"""
		tmp_path = None
		writer = self.ostream()
		if writer is None:
			# open a tmp file to write the data to
			fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
			
			if istream.binsha is None:
				writer = FDCompressedSha1Writer(fd)
			else:
				writer = FDStream(fd)
			# END handle direct stream copies
		# END handle custom writer
	
		try:
			try:
				if istream.binsha is not None:
					# copy as much as possible, the actual uncompressed item size might
					# be smaller than the compressed version
					stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
				else:
					# write object with header, we have to make a new one
					write_object(istream.type, istream.size, istream.read, writer.write,
									chunk_size=self.stream_chunk_size)
				# END handle direct stream copies
			finally:
				if tmp_path:
					writer.close()
			# END assure target stream is closed
		except:
			if tmp_path:
				os.remove(tmp_path)
			raise
		# END assure tmpfile removal on error
		
		hexsha = None
		if istream.binsha:
			hexsha = istream.hexsha
		else:
			hexsha = writer.sha(as_hex=True)
		# END handle sha
		
		if tmp_path:
			obj_path = self.db_path(self.object_path(hexsha))
			obj_dir = dirname(obj_path)
			if not isdir(obj_dir):
				mkdir(obj_dir)
			# END handle destination directory
			# rename onto existing doesn't work on windows
			if os.name == 'nt' and isfile(obj_path):
				remove(obj_path)
			# END handle win322
			rename(tmp_path, obj_path)
			
			# make sure its readable for all ! It started out as rw-- tmp file
			# but needs to be rwrr
			chmod(obj_path, self.new_objects_mode)
		# END handle dry_run
		
		istream.binsha = hex_to_bin(hexsha)
		return istream
		
	def sha_iter(self):
		# find all files which look like an object, extract sha from there
		for root, dirs, files in os.walk(self.root_path()):
			root_base = basename(root)
			if len(root_base) != 2:
				continue
				
			for f in files:
				if len(f) != 38:
					continue
				yield hex_to_bin(root_base + f)
			# END for each file
		# END for each walk iteration
		
	def size(self):
		return len(tuple(self.sha_iter()))
python-gitdb 0.5.4-1 / usr / share / pyshared / gitdb / db / loose.py