diff --git a/gmailfs.py b/gmailfs.py index 33b484f..39c8b64 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -120,6 +120,10 @@ UserConfigFile = abspath(expanduser("~/.gmailfs.conf")) GMAILFS_VERSION = '5' +PATHNAME_MAX = 256 + +DELETE_AFTER_READ = 1 +KEEP_AFTER_READ = 0 PathStartDelim = '__a__' PathEndDelim = '__b__' @@ -160,6 +164,8 @@ rsp_cache = {} debug = 1 +if "DEBUG" in os.environ: + debug = int(os.environ['DEBUG']) if debug >= 3: imaplib.Debug = 3 #imaplib.Debug = 4 @@ -254,12 +260,19 @@ def log_warning(str): return def parse_path(path): - # should we check that there's always a / in the path?? - ind = string.rindex(path, '/') - parent_dir = path[:ind] - filename = path[ind+1:] + try: + # rindex excepts when there's no / + ind = string.rindex(path, '/') + parent_dir = path[:ind] + filename = path[ind+1:] + except: + print("parse_path() exception") + ind = 0 + parent_dir = "" + filename = path if len(parent_dir) == 0: parent_dir = "/" + log_debug4("parse_path('%s') parent_dir: '%s', filename: '%s'" % (path, parent_dir, filename)) return parent_dir, filename @@ -325,16 +338,17 @@ def imap_uid(imap, cmd, arg1, arg2 = None, arg3 = None, arg4 = None): tries = tries - 1 try: ret = imap.uid(cmd, arg1, arg2, arg3) + if not ret == None: + return ret; except Exception, e: log_error("imap.uid() error: %s (tries left: %d)" % (str(e), tries)) - imap.fs.kick_imap(imap) if tries <= 0: - raise + abort() except: log_error("imap.uid() unknown error: (tries left: %d)" % (tries)) - imap.fs.kick_imap(imap) if tries <= 0: - raise + abort() + imap.fs.kick_imap(imap) return ret def __imap_append(imap, fsNameVar, flags, now, msg): @@ -350,25 +364,25 @@ def __imap_append(imap, fsNameVar, flags, now, msg): time.sleep(1) rsp = None continue - except RuntimeError, e: - log_error("imap.append() error: %s" % (str(e))) - imap.fs.kick_imap(imap) + except: + log_error("imap.append() exception: '%s' (tries left: %d)" % (sys.exc_info()[0], tries)) if tries <= 0: - raise + abort() + imap.fs.kick_imap(imap) return rsp, data def imap_getquotaroot(imap, fsNameVar): - tries = 2 + tries = 3 ret = None while ret == None: + tries = tries - 1 try: ret = imap.getquotaroot(fsNameVar) - except RuntimeError, e: - log_error("imap.getquotaroot() error: %s" % (str(e))) + except: + log_error("imap.getquotaroot() error: %s" % sys.exc_info()[0]) imap.fs.kick_imap(imap) if tries <= 0: - raise - tries = tries - 1 + abort() return ret # The IMAP uid commands can take multiple uids and return @@ -379,6 +393,11 @@ def imap_getquotaroot(imap, fsNameVar): # # does python have a ... operator like c preprocessor? def uid_cmd(imap, cmd, uids, arg1, arg2 = None, arg3 = None): + # there's something funky going on with gmail. It seems to not synchronize + # bewtween different IMAP connections. You might ask for all the messages + # in two threads and get different responses. Running imap.select() seems + # to synchronize it again. + imap.select(fsNameVar) semget(imap.lock) ret = __uid_cmd(imap, cmd, uids, arg1, arg2, arg3) imap.lock.release() @@ -545,24 +564,26 @@ def __init__ (self, fs, nr): self.nr = nr def write_out_object(self): - try: - # block, and timeout after 1 second - object = self.fs.dirty_objects.get(1, 1) - except: - # effectively success if we timeout + object = self.fs.get_dirty_object() + if object == None: + # the queues are empty, so all is good + time.sleep(1) return 0 # we do not want to sit here sleeping on objects # so if we can not get the lock, move on to another # object got_lock = object.writeout_lock.acquire(0) + log_debug3("write out got_lock: '%s' obj dirty: %s" % (str(got_lock), str(object.dirty()))) if not got_lock: - self.fs.dirty_objects.put(object) + dont_block = 1 + self.fs.queue_dirty(object, dont_block) return -1 sem_msg[object.writeout_lock] = "acquired write_out_object()" reason = Dirtyable.dirty_reason(object) start = time.time() ret = write_out_nolock(object, "bdflushd") end = time.time() + log_debug3("write out about to releaselock") object.writeout_lock.release() sem_msg[object.writeout_lock] += " released write_out_object()" size = self.fs.dirty_objects.qsize() @@ -685,7 +706,7 @@ def _getMsguidsByQuery(about, imap, queries, or_query = 0): except: log_error("IMAP error on SEARCH") log_error("queryString: ->%s<-" % (queryString)) - print "\nIMAP exception ", sys.exc_info()[0] + print "\nIMAP exception, exiting", sys.exc_info()[0] exit(-1) finally: imap.lock.release() @@ -780,14 +801,14 @@ def getSingleMessageByQuery(desc, imap, q): return fetch_full_message(imap, msgid) def _pathSeparatorEncode(path): - s1 = re.sub("/","__fs__",path) - s2 = re.sub("-","__mi__",s1) - return re.sub("\+","__pl__",s2) + #s1 = re.sub("/","__fs__",path) + #s2 = re.sub("-","__mi__",s1) + return re.sub("\+","__pl__",path) def _pathSeparatorDecode(path): - s1 = re.sub("__fs__","/",path) - s2 = re.sub("__mi__","-",s1) - return re.sub("__pl__","+",s2) + #s1 = re.sub("__fs__","/",path) + #s2 = re.sub("__mi__","-",s1) + return re.sub("__pl__","+",path) def _logException(msg): @@ -859,16 +880,16 @@ def clear_dirty(self, nr): log_info("cleared original dirty reason: '%s'" % (orig_reason)) return msg - def mark_dirty(self, desc): + def mark_dirty(self, desc, can_block = 1): self.__dirty = desc self.dirty_reasons.put(desc) try: - self.dirty_mark.put_nowait(desc); - self.fs.dirty_objects.put(self) + self.dirty_mark.put_nowait(desc) + self.fs.queue_dirty(self, can_block) except: log_debug("mark_dirty('%s') skipped global list, already dirty" % (self.to_str())) - log_debug1("mark_dirty('%s') because '%s' (%d reasons)" % - (self.to_str(), desc, self.dirty_reasons.qsize())) + log_debug1("mark_dirty('%s') because '%s' (%d reasons, %d total)" % + (self.to_str(), desc, self.dirty_reasons.qsize(), self.fs.nr_dirty_objects())) def to_str(self): return "Dirtyable.to_str()" @@ -910,7 +931,7 @@ def d_write_out(self, desc): raise e return 0 - def unlink(self): + def d_unlink(self): # FIXME, don't allow directory unlinking when children log_debug1("unlink path:"+self.path()+" with nlinks:"+str(self.inode.i_nlink)) if self.inode.mode & S_IFDIR: @@ -924,11 +945,26 @@ def unlink(self): to_trash = self.inode.dec_nlink() to_trash.append(str(self.dirent_msg.uid)) if len(to_trash): + for uid in to_trash: + log_debug1("unlink() going to trash uid: %s" % (uid)) imap_trash_uids(self.fs.imap, to_trash) + semget(self.fs.lookup_lock) + # this ensures that the (now dead) dentry will never get written out + while (self.dirty() > 0): + dirty_token = self.dirty() + print "d_unlink() dirty token: '%s'" % (dirty_token) + self.clear_dirty(dirty_token) deleted = self.fs.dirent_cache.pop(self.path()) if deleted != None and deleted != self: - log_error("removed wrong dirent from cache") + log_error("[%s] removed wrong dirent from cache self: %s" % (str(thread.get_ident()), str(self))) + log_error("\tmy path: '%s' uid: '%s' obj: %s" % (self.path(), str(self.dirent_msg.uid), str(self))) + log_error("\tdl path: '%s' uid: '%s' obj: %s" % (deleted.path(), str(deleted.dirent_msg.uid), str(deleted))) + self.fs.lookup_lock.release() + parentdir, name = parse_path(self.path()) + parentdirinode = self.fs.lookup_inode(parentdir) + parentdirinode.i_nlink -= 1 + parentdirinode.mark_dirty("d_unlink() for parent dir") #@-node:class GmailDirent @@ -991,10 +1027,10 @@ def __init__(self, inode_msg, fs): def to_str(self): return "inode(%s)" % (str(self.ino)) - def mark_dirty(self, desc): + def mark_dirty(self, desc, can_block = 1): log_debug2("inode mark_dirty(%s) size: '%s'" % (desc, str(self.size))) self.mtime = int(time.time()) - Dirtyable.mark_dirty(self, desc) + Dirtyable.mark_dirty(self, desc, can_block) def i_write_out(self, desc): log_debug2("i_write_out() self: '%s'" % (self)) @@ -1003,7 +1039,8 @@ def i_write_out(self, desc): value = self.xattr[attr] payload_name = 'xattr-'+attr log_debug1("adding xattr payload named '%s': '%s'" % (payload_name, value)) - msg_add_payload(msg, value, payload_name) + msg_add_payload(self.inode_msg, value, payload_name) + log_debug3("i_write_out() self.dirty: '%s' desc: '%s'" % (Dirtyable.dirty_reason(self), desc)) # remember where this is in case we have to delete it i_orig_uid = self.inode_msg.uid # because this wipes it out @@ -1076,10 +1113,18 @@ def dec_nlink(self): return [] log_debug2("truncating inode") subject = 'b='+str(self.ino)+'' + # either wait until it is fully written out + got_lock = self.writeout_lock.acquire() + # or make sure that it never is + while (self.dirty() > 0): + dirty_token = self.dirty() + self.clear_dirty(dirty_token) + block_uids = _getMsguidsByQuery("unlink blocks", self.fs.imap, [subject]) to_trash = [] to_trash.extend(block_uids) to_trash.append(str(self.inode_msg.uid)) + self.writeout_lock.release() return to_trash def fill_from_inode_msg(self): @@ -1222,7 +1267,7 @@ def __init__(self, inode, block_nr): self.start_offset = self.block_nr * self.block_size self.end_offset = self.start_offset + self.block_size self.ts = time.time() - log_debug1("created new block: %d" % (self.block_nr)) + log_debug1("created new GmailBlock: %d for inode: %d" % (self.block_nr, inode.ino)) gmail_blocks[self] = self def to_str(self): @@ -1265,7 +1310,7 @@ def write(self, buf, off): self.buffer = list(" "*self.block_size) self.buffer_lock.release() else: - self.populate_buffer(1) + self.populate_buffer(DELETE_AFTER_READ) buf_write_start = file_off - self.start_offset buf_write_end = buf_write_start + len(buf_part) @@ -1285,7 +1330,7 @@ def write(self, buf, off): self.buffer_lock.release() log_debug1("wrote block range: [%d:%d]" % (buf_write_start, buf_write_end)) - log_debug1("block write() setting dirty") + log_debug1("block write() setting GmailBlock dirty") self.mark_dirty("file write") if file_off + len(buf_part) > self.inode.size: @@ -1311,7 +1356,7 @@ def b_write_out(self, desc): self.buffer_lock.release() if self.inode.size / self.block_size == self.block_nr: part = self.inode.size % self.block_size - print("on last block, so only writing out %d/%d bytes of block" % (part, len(buf))) + log_debug2("on last block, so only writing out %d/%d bytes of block" % (part, len(buf))) buf = buf[:part] arr = array.array('c') @@ -1328,7 +1373,12 @@ def b_write_out(self, desc): log_debug("b_write_out() finished, rsp: '%s'" % str(msgid)) if msgid > 0: log_debug("Sent write commit ok") - self.inode.mark_dirty("commit data block") + # This is a special case. This b_write_out() happens in a worker thread, + # and if we block it waiting on dirty data to be written out, we may end + # up deadlocking. So, put the inode on a dirty list, but do not block + # doing it. + can_block = 0 + self.inode.mark_dirty("commit data block", can_block) tmpf.close() ret = 0 else: @@ -1341,7 +1391,7 @@ def read(self, readlen, file_off): readlen = min(self.inode.size - file_off, readlen) log_debug1("read block: %d" % (self.block_nr)) - self.populate_buffer(1) + self.populate_buffer(KEEP_AFTER_READ) start_offset = max(file_off, self.start_offset) end_offset = min(file_off + readlen, self.end_offset) start_offset -= self.start_offset @@ -1359,7 +1409,7 @@ def populate_buffer(self, deleteAfter): if len(self.buffer): self.buffer_lock.release() return - log_debug1("populate_buffer() filling block %d because len: %d" % (self.block_nr, len(self.buffer))) + log_debug2("populate_buffer() filling block %d because len: %d" % (self.block_nr, len(self.buffer))) q1 = 'b='+str(self.inode.ino) q2 = 'x='+str(self.block_nr) @@ -1382,12 +1432,13 @@ def populate_buffer(self, deleteAfter): log_debug3("after loop, a: '%s'" % str(a)) a = list(a) - if deleteAfter: + if deleteAfter == DELETE_AFTER_READ: + log_debug1("populate_buffer() deleting msg: '%s'" % (msg.uid)); imap_trash_msg(self.inode.fs.imap, msg) contentList = list(" "*self.block_size) contentList[0:] = a self.buffer = contentList - print("populate_buffer() filled block %d with len: %d" % (self.block_nr, len(self.buffer))) + log_debug2("populate_buffer() filled block %d with len: %d" % (self.block_nr, len(self.buffer))) self.buffer_lock.release() #@-node:class OpenGmailFile @@ -1396,21 +1447,35 @@ def populate_buffer(self, deleteAfter): class Gmailfs(Fuse): def kick_imap(self, imap): + print("kicking imap connection...") + print("disconnecting...") + self.disconnect_from_server(imap) + print("disonnected") try: - self.disconnect_from_server(imap) + sys.stderr.write("connecting to server...") + self.connect_to_server(imap) + sys.stderr.write("done\n") + except Exception, e: + print("kick connect exception: '%s'" % str(e)) except: - pass - self.connect_to_server(imap) + print("kick connect unknown exception") def disconnect_from_server(self, imap): + # these are just to be nice to the server. It + # does not matter if they succeed because the + # init below will just blow everything away. try: imap.close() + imap.logout() + imap.shutdown() except: - pass + print("shutdown exception"); try: - imap.logout() + imap.__init__("imap.gmail.com", 993) except: - pass + print("reconnect exception"); + + return #@ @+others def connect_to_server(self, imap = None): @@ -1442,23 +1507,130 @@ def connect_to_server(self, imap = None): return imap def get_imap(self): - return self.imap_pool.get() + if self.early: + return self.imap + imap = None + timeout = 1 + block = 1 + tries = 0 + + while imap == None: + tries = tries + 1 + try: + imap = self.imap_pool.get(block, timeout) + except: + if tries % 10 == 0: + print("[%d] hung on getting imap worker for %d seconds" % (thread.get_ident(), tries)) + traceback.print_stack() + return imap def put_imap(self, imap): + if self.early: + return self.imap_pool.put(imap) + def drain_nonblocking_dirty_queue(self): + src = self.dirty_objects_nonblocking + while not src.empty(): + try: + o = src.get_nowait() + except Queue.Empty: + return + self.queue_dirty_blockable(o) + + # I was getting things blocked on addition to the dirty list. I thought + # the writeout threads had died. But, I put this in and miraculously it + # started to work ok. There might be a bug in the blocking Queue.put() + # that causes it to hang when it shouldn't. This may work around it. + def queue_dirty_blockable(self, obj): + tries = 0 + timeout = 10 + success = 0 + can_block = 1 + + while success == 0: + try: + self.dirty_objects.put(obj, can_block, timeout) + success = 1 + except Queue.Full: + tries = tries + 1 + print("[%d] hung on dirty (%d long) list for %d seconds" % + (thread.get_ident(), self.dirty_objects.qsize(), tries*timeout)) + traceback.print_stack() + + def queue_dirty(self, obj, can_block = 1): + if can_block: + # take the opportunity to move the non-blocking queue + # over to the blocking one. The more often you do this + # the less chance there is for the queue to get too + # large + self.drain_nonblocking_dirty_queue() + self.queue_dirty_blockable(obj) + else: + # this one is non-blocking on put()s because it has no + # size limit + self.dirty_objects_nonblocking.put(obj) + log_debug3("end queue_dirty(%s, %d) queue size now: %d/%d" % + (obj, can_block, self.dirty_objects.qsize(), self.dirty_objects_nonblocking.qsize())) + + + def nr_dirty_objects(self): + size = self.dirty_objects.qsize() + self.dirty_objects_nonblocking.qsize() + return size + + def get_dirty_object(self): + try: + obj = self.dirty_objects.get_nowait() + log_debug3("get_dirty_object() found one in normal queue: '%s'" % (obj)) + return obj + except Queue.Empty: + pass + try: + obj = self.dirty_objects_nonblocking.get_nowait() + log_debug3("get_dirty_object() found one in nonblock queue: '%s'" % (obj)) + return obj + except Queue.Empty: + pass + log_debug3("get_dirty_object() found nothing") + return None + + def imap_get_all_uids(self, imap): + semget(imap.lock) + tmpdebug = imap.debug + imap.debug = 2 + #imap.close() + imap.select(fsNameVar) + resp, msgids = imap_uid(imap, "SEARCH", 'ALL') + print "imap_get_all_uids: resp: %s msgids: %s" % (resp, msgids) + uids = msgids[0].split() + print ("%d messages found..." % (len(uids))) + imap.lock.release() + imap.debug = tmpdebug + return uids + #@+node:__init__ def __init__(self, extraOpts, mountpoint, *args, **kw): Fuse.__init__(self, *args, **kw) + self.dirty_objects = Queue.Queue(50) + self.dirty_objects_nonblocking = Queue.Queue() + self.lookup_lock = threading.Semaphore(1) + self.inode_cache_lock = threading.Semaphore(1) - self.nr_imap_threads = 4 + self.imap = self.connect_to_server() + self.early = 1; + if "IMAPFS_FSCK" in os.environ: + self.fsck() + exit(0) + self.early = 0; + + self.nr_imap_threads = 3 + if "IMAPFS_NR_THREADS" in os.environ: + self.nr_imap_threads = int(os.environ['IMAPFS_NR_THREADS']) self.imap_pool = Queue.Queue(self.nr_imap_threads) for i in range(self.nr_imap_threads): + sys.stderr.write("connecting thread %d to server..." % (i)) self.imap_pool.put(self.connect_to_server()) - - self.dirty_objects = Queue.Queue(50) - self.lookup_lock = threading.Semaphore(1) - self.inode_cache_lock = threading.Semaphore(1) + sys.stderr.write("done\n") self.fuse_args.mountpoint = mountpoint self.fuse_args.setmod('foreground') @@ -1513,7 +1685,6 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): #04:52.69 CAPABILITIES: ('IMAP4REV1', 'UNSELECT', 'IDLE', 'NAMESPACE', 'QUOTA', 'XLIST', 'CHILDREN', 'XYZZY') #04:52.97 < * CAPABILITY IMAP4rev1 UNSELECT LITERAL+ IDLE NAMESPACE QUOTA ID XLIST CHILDREN X-GM-EXT-1 UIDPLUS COMPRESS=DEFLATE - self.imap = self.connect_to_server() # This select() can be done read-only # might be useful for implementing "mount -o ro" log_info("Connected to gmail") @@ -1534,36 +1705,209 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): trash_all = 1 if trash_all: print("deleting existing messages...") - semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') - self.imap.lock.release() - uids = msgids[0].split() - print ("%d found..." % (len(uids))) - joined_uids = string.join(msgids[0].split(), ",") - log_debug2("about to delete msgids: ->%s<-" % (joined_uids)) + uids = self.imap_get_all_uids(self.imap) if (len(uids)): imap_trash_uids(self.imap, uids) - print("done deleting %d existing messages" % (len(msgids[0].split()))) + print("done deleting %d existing messages" % (len(uids))) + print("mailbox now has %d messages" % (len(self.imap_get_all_uids(self.imap)))) semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') + expunged = self.imap.expunge() self.imap.lock.release() - print("mailbox now has %d messages" % (len(msgids[0].split()))) - self.imap.expunge() + print("mailbox expunged: %s" % str(expunged)) + print("mailbox now has %d messages" % (len(self.imap_get_all_uids(self.imap)))) - semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') self.imap.lock.release() - print("mailbox now has %d messages" % (len(msgids[0].split()))) + print("mailbox has %d messages" % (len(self.imap_get_all_uids(self.imap)))) #exit(0) #elf.mythread() + log_debug1("init looking for root inode") + path = "/" + inode = self.lookup_inode(path) + if (inode == None) and (path == '/'): + # I would eventually like to see this done in a mkfs-style command + log_info("creating root inode") + mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IWUSR|S_IRGRP|S_IXGRP|S_IXOTH|S_IROTH + inode = self.mk_inode(mode, 1) + # "/" is special and gets an extra link. + # It will always appear to have an nlink of 3 + # even when it is empty + inode.i_nlink = inode.i_nlink + 1 + dirent = self.link_inode(path, inode) + #write_out(inode, "new root inode") + #write_out(dirent, "new root dirent") + log_info("root inode uids: %s %s" % (dirent.dirent_msg.uid, inode.inode_msg.uid)) + inode = self.lookup_inode(path) + if inode == None: + log_info("uh oh, can't find root inode") + exit(-1) + pass #@-node:__init__ #@+node:attribs flags = 1 + def fsck_trash_msg(self, msg): + if not "IMAPFS_FSCK_CAN_WRITE" in os.environ: + print "fsck_trash_msg() can not write, so skipping fix" + return + imap_trash_msg(self.imap, msg) + #@-node:attribs + def fsck(self): + print ("fsck()") + self.imap.select(fsNameVar) + uids = self.imap_get_all_uids(self.imap) + print ("fsck: %d messages found..." % (len(uids))) + joined_uids = string.join(uids, ",") + log_debug1("fsck found msgids: ->%s<-" % (joined_uids)) + if (len(uids) == 0): + print ("fsck: empty mailbox") + return +# def parse_inode_msg_subj(self, inode_msg): +# def parse_dirent_msg(self, msg): + # these probably aren't precise enough. What if a dirent is for a + # file called "foo_inode_msg_bar"?? + dirent_uids = _getMsguidsByQuery("get all dirents", self.imap, ['dirent_msg ']) + inode_uids = _getMsguidsByQuery("get all inodes", self.imap, ['inode_msg ']) + + for uid in dirent_uids: + #subject = msg['Subject'] + print "dirent uid: '%s'" % (uid) + for uid in inode_uids: + #subject = msg['Subject'] + print "inode_uid: '%s'" % (uid) + + dir_members = {}; + path_to_dirent = {}; + print "fetching dirent msgs..." + for msgid, msg in fetch_full_messages(self.imap, dirent_uids).items(): + dirent_parts = self.parse_dirent_msg(msg) + pathname = _pathSeparatorDecode(dirent_parts[PathNameTag]) + + dirent_parts['pathname'] = pathname + dirent_parts['msg'] = msg + + filename = dirent_parts[FileNameTag] + if not dir_members.has_key(pathname): + dir_members[pathname] = {} + directory = dir_members[pathname] + if directory.has_key(filename): + existing = directory[filename] + print "ERROR: '%s' occurs twice in dir: '%s'" % (filename, pathname) + if existing['msg'].uid > msgid: + # throw away the current message that + # we're looking at + # and forget that we ever saw it + self.fsck_trash_msg(msg) + continue + else: + # throw away the message that was there + self.fsck_trash_msg(existing['msg']) + # not stricly necessary, but clearer + directory.pop(filename) + directory[filename] = dirent_parts + # are these copy by value or reference??!?!? + dir_members[pathname] = directory + print "[%s] found in path '%s': file: '%s'" % (str(msgid), pathname, filename) + # the "/" dirent has a path of '/' and a filename: '' + if len(filename) > 0: + # the path of things under the root dir already end in / + if len(dirent_parts['pathname']) > 1: + full = dirent_parts['pathname'] + "/" + filename + else: + full = dirent_parts['pathname'] + filename + else: + full = "/" + path_to_dirent[full] = dirent_parts + + inode_refcount = {} + for full, dirent in path_to_dirent.iteritems(): + ino = dirent[RefInodeTag] + if not inode_refcount.has_key(ino): + print "creating refcount for '%s'" % (full) + inode_refcount[ino] = 1 + else: + inode_refcount[ino] = inode_refcount[ino] + 1 + print " bumping refcount for '%s' to : %d" % (full, inode_refcount[ino]) + + parent_path = dirent['pathname'] + #print "process parent: '%s' for '%s'" % (parent_path, full) + if not len(parent_path): + print "WARNING: zero-length parent: '%s' for '%s' hope it's /)" % (parent_path, full) + continue + if not path_to_dirent.has_key(parent_path): + print "ERROR: could not find parent entry '%s'" % (parent_path) + self.fsck_trash_msg(dirent['msg']) + continue + + print "second dirent pass, bumping refcounts for parent directories..." + for full, dirent in path_to_dirent.iteritems(): + parent_path = dirent['pathname'] + parent_dirent = path_to_dirent[parent_path] + parent_ino = parent_dirent[RefInodeTag] + #if full == "/": + # print "skipping refcount bump for '/', it has enough" + # continue + if not inode_refcount.has_key(parent_ino): + print "WARNING: parent: '%s' not seen until second dirent pass" % (parent_path) + inode_refcount[parent_ino] = 0 + inode_refcount[parent_ino] = inode_refcount[parent_ino] + 1 + print "bumping refcount for parent dir of '%s': '%s' to: %d" \ + % (full, parent_path, inode_refcount[parent_ino]) + + + inodes_seen = {} + print "fetching all inodes..." + for msgid, msg in fetch_full_messages(self.imap, inode_uids).items(): + + inode_parts = self.parse_inode_msg_subj(msg) + ino = inode_parts[InodeTag] + + inode_obj = GmailInode(msg, self) + mode = inode_obj.mode + if inode_obj.mode & S_IFDIR: + inode_refcount[ino] = inode_refcount[ino] + 1 + print "bumped refcount for dir ino: %d to : %d" % (ino, inode_refcount[ino]) + inode_obj = None + + log_debug2("msgid: %s has ino: %s" % (msgid, ino)) + if not inode_refcount.has_key(ino): + # FIXME: link into lost+found dir + print "ERROR: unlinked inode: '%s'" % (ino) + self.fsck_trash_msg(msg) + continue + if inodes_seen.has_key(ino): + existing = inodes_seen[ino] + print "ERROR: duplicate messages for inode: '%s'" % (str(ino)) + if existing['msg'].uid > msgid: + # throw away the current message that + # we're looking at + self.fsck_trash_msg(msg) + # and forget that we ever saw it + continue + else: + # throw away the message that was there + self.fsck_trash_msg(existing['msg']) + # not stricly necessary, but clearer + inodes_seen.pop(ino) + inode_parts['msg'] = msg + inodes_seen[ino] = inode_parts + + stored_nr_links = inode_parts[NumberLinksTag] + counted_nr_links = inode_refcount[ino] + if stored_nr_links != counted_nr_links: + print "WARNING: ino: %s claims to have %s links, but we counted %s" % (ino, stored_nr_links, counted_nr_links) + #if "IMAPFS_FSCK_CAN_WRITE" in os.environ: + # print "fixing inode link count: %s" % (str(ino)) + # inode = GmailInode(msg, self) + # inode.i_nlink = counted_nr_links + # inode.mark_dirty("fsck") + # inode.i_write_out("fsck") + continue + print "GOOD: linked inode: '%s' i_nlink: %d" % (ino, inode_refcount[ino]) + class GmailStat(fuse.Stat): def __init__(self): @@ -1597,23 +1941,7 @@ def getattr(self, path): #st_mtime (time of most recent content modification) #st_ctime (time of most recent content modification or metadata change). - log_debug3("getattr() -1") inode = self.lookup_inode(path) - log_debug3("getattr() 0") - if (inode == None) and (path == '/'): - log_info("creating root inode") - mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IWUSR|S_IRGRP|S_IXGRP|S_IXOTH|S_IROTH - inode = self.mk_inode(mode, 1, 2) - dirent = self.link_inode(path, inode) - write_out(inode, "new root inode") - write_out(dirent, "new root dirent") - log_info("root inode uids: %s %s" % (dirent.dirent_msg.uid, inode.inode_msg.uid)) - inode = self.lookup_inode(path) - if inode == None: - log_info("uh oh, can't find root inode") - exit(-1) - log_debug3("getattr() 1") - if inode: log_debug3("getattr() 2") log_debug3("found inode for path: '%s'" % (path)) @@ -1657,11 +1985,14 @@ def readlink(self, path): e = OSError("Not a link"+path) e.errno = EINVAL raise e - log_debug("about to follow link in body:"+inode.msg.as_string()) - body = fixQuotedPrintable(inode.msg.as_string()) + log_debug("about to follow link in body:"+inode.inode_msg.as_string()) + body = fixQuotedPrintable(inode.inode_msg.as_string()) m = re.search(SymlinkTag+'='+LinkStartDelim+'(.*)'+ LinkEndDelim,body) - return m.group(1) + link_target = m.group(1) + link_target = _pathSeparatorDecode(link_target) + return link_target + #@-node:readlink #@+node:readdir @@ -1730,16 +2061,13 @@ def flush_dirent_cache(self): self.put_inode(dirent.inode) while 1: - try: - # no args means do not block, and trow - # exception immediately if empty - object = self.fs.dirty_objects.get() - write_out(object, "flush_dirent_cache()") - log_info("flush_dirent_cache() wrote out %s" % (object.to_str())) - except: + object = self.get_dirty_object() + if object == None: log_info("no more object to flush") break - size = self.fs.dirty_objects.qsize() + write_out(object, "flush_dirent_cache()") + log_info("flush_dirent_cache() wrote out %s" % (object.to_str())) + size = self.nr_dirty_objects() log_info("explicit flush done") #@+node:unlink @@ -1747,7 +2075,9 @@ def unlink(self, path): log_entry("unlink called on:"+path) try: dirent = self.lookup_dirent(path) - dirent.unlink() + if dirent == None: + return -EEXIST + dirent.d_unlink() return 0 except: _logException("Error unlinking file"+path) @@ -1766,7 +2096,7 @@ def rmdir(self, path): # e.errno = ENOTEMPTY # raise e dirent = self.lookup_dirent(path) - dirent.unlink() + dirent.d_unlink() # update number of links in parent directory parentdir, filename = parse_path(path) @@ -1782,7 +2112,7 @@ def rmdir(self, path): def symlink(self, oldpath, newpath): log_debug1("symlink: oldpath='%s', newpath='%s'" % (oldpath, newpath)) mode = S_IFLNK|S_IRWXU|S_IRWXG|S_IRWXO - inode = self.mk_inode(mode, 0, 1) + inode = self.mk_inode(mode, 0) inode.symlink_tgt = newpath self.link_inode(oldpath, inode) @@ -1805,7 +2135,7 @@ def format_dirent_subj(self, str): def parse_dirent_msg(self, msg): subject_re = self.format_dirent_subj('(.*)') - subject = msg['Subject'].replace("\r\n\t", " ") + subject = msg['Subject'].translate(string.maketrans('\r\n\t', ' ')) m = re.match(subject_re, subject) log_debug3("looking for regex: '%s'" % (subject_re)) log_debug3("subject: '%s'" % (subject)) @@ -1846,7 +2176,9 @@ def mk_dirent_msg(self, path, inode_nr_ref): inode_nr_ref, fsNameVar, GMAILFS_VERSION) - return mkmsg(subject, body) + msg = mkmsg(subject, body) + log_debug1("mk_dirent_msg('%s', 'ino=%s') done" % (path, str(inode_nr_ref))) + return msg def parse_inode_msg_subj(self, inode_msg): subject = inode_msg['Subject'].replace('\u003d','=') @@ -1877,13 +2209,13 @@ def rename(self, path_src, path_dst): dst_dirent = self.lookup_dirent(path_dst) if not dst_dirent == None: - dst_dirent.unlink() + dst_dirent.d_unlink() # ensure the inode does not go away between # when we unlink and relink it inode = self.get_inode(src_dirent.inode.ino) # do the unlink first, because otherwise we # will get two dirents at the same path - src_dirent.unlink() + src_dirent.d_unlink() self.link_inode(path_dst, inode) self.put_inode(inode) @@ -1990,7 +2322,7 @@ def mknod(self, path, mode, dev): """ Python has no os.mknod, so we can only do some things """ log_entry("mknod('%s')" % (path)) if S_ISREG(mode) | S_ISFIFO(mode) | S_ISSOCK(mode): - inode = self.mk_inode(mode, 0, 1) + inode = self.mk_inode(mode, 0) self.link_inode(path, inode) # update parent dir?? #open(path, "w") @@ -1999,33 +2331,46 @@ def mknod(self, path, mode, dev): #@-node:mknod def mk_dirent(self, inode, path): + # this should keep us from racing with lookup_dirent() + semget(self.lookup_lock) if self.dirent_cache.has_key(path): log_debug("dirent cache hit on path: '%s'" % (path)) return self.dirent_cache[path] - # this should keep us from racing with lookup_dirent() - semget(self.lookup_lock) filename, dir = parse_path(path) msg = self.mk_dirent_msg(path, inode.ino) + log_debug1("mk_dirent_msg(%s) done" % path) dirent = GmailDirent(msg, inode, self) + log_debug1("GmailDirent(%s) done" % path) dirent.mark_dirty("mk_dirent") + log_debug1("mark_dirty '%s' done" % path) if len(self.dirent_cache) > 1000: self.flush_dirent_cache() + log_debug1("cache flush '%s' done" % path) log_debug1("added dirent to cache for path: '%s'" % (dirent.path())) self.dirent_cache[dirent.path()] = dirent self.lookup_lock.release() + log_debug1("mk_dirent('%s') lock released" % path) return dirent - def mk_inode(self, mode, size, nlink=1): + def mk_inode(self, mode, size): inode = GmailInode(None, self) inode.mode = int(mode) inode.size = int(size) - inode.i_nlink = int(nlink) + inode.i_nlink = 0 inode.mark_dirty("new inode") self.inode_cache[inode.ino] = inode return inode def link_inode(self, path, inode): dirent = self.mk_dirent(inode, path) + inode.i_nlink = inode.i_nlink + 1 + inode.mark_dirty("link_inode()") + + parentdir, name = parse_path(path) + log_debug1("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) + parentdirinode = self.lookup_inode(parentdir) + parentdirinode.i_nlink += 1 + parentdirinode.mark_dirty("link_inode() for parent dir") return dirent def lookup_inode(self, path): @@ -2040,12 +2385,10 @@ def mkdir(self, path, mode): log_entry("mkdir('%s', %o)" % (path, mode)) if (self.lookup_dirent(path) != None): return -EEXIST - inode = self.mk_inode(mode|S_IFDIR, 1, 2) + inode = self.mk_inode(mode|S_IFDIR, 1) + # extra link for for '.' + inode.i_nlink = inode.i_nlink + 1 self.link_inode(path, inode) - parentdir, name = parse_path(path) - parentdirinode = self.lookup_inode(parentdir) - parentdirinode.i_nlink += 1 - parentdirinode.mark_dirty("mkdir") #@-node:mkdir #@+node:utime @@ -2082,7 +2425,6 @@ def open(self, path, flags): #@+node:read def read(self, path, readlen, offset): - log_entry("read") try: log_debug1("gmailfs.py:Gmailfs:read(len=%d, offset=%d, path='%s')" % (readlen, offset, path)) @@ -2198,6 +2540,9 @@ def statfs(self): st.f_bavail = blocks_avail st.f_files = files st.f_ffree = files_free + if "IMAPFS_FSCK_ON_STATFS" in os.environ: + print "now fscking" + self.fsck() return st #@-node:statfs @@ -2210,6 +2555,9 @@ def fsync(self, path, isfsyncfile): write_out(inode, "fsync_inode") #for block in inode._blocks: # write_out(block, "fsync_blocks") + if "IMAPFS_FSCK_ON_FLUSH" in os.environ: + print "now fscking" + self.fsck() return 0 #@-node:fsync @@ -2217,7 +2565,10 @@ def fsync(self, path, isfsyncfile): def fsyncdir(self, path, isfsyncfile): log_entry("gmailfs.py:Gmailfs:fsyncdir: path=%s, isfsyncfile=%s" % (path, isfsyncfile)) log_info("gmailfs.py:Gmailfs:fsyncdir: path=%s, isfsyncfile=%s" % (path, isfsyncfile)) - return -ENOSYS + if "IMAPFS_FSCK_ON_FLUSH" in os.environ: + print "now fscking" + self.fsck() + return -ENOSYS #@-node:fsync @@ -2227,6 +2578,12 @@ def flush(self, path): dirent = self.lookup_dirent(path) #write_out(dirent, "flush") #write_out(dirent.inode, "flush") + while self.nr_dirty_objects() > 0: + print "flush: there are still %d dirty objects, sleeping..." % (self.nr_dirty_objects()) + time.sleep(1) + if "IMAPFS_FSCK_ON_FLUSH" in os.environ: + print "now fscking" + self.fsck() return 0 #@-node:fsync @@ -2355,6 +2712,13 @@ def prefetch_dirent_msgs(self, dir): return dirent_msgs_by_iref def lookup_dirent(self, path): + if (len(path) > PATHNAME_MAX): + e = OSError("Pathname too long:"+path) + e.errno = ENAMETOOLONG + print("ENAMETOOLONG") + traceback.print_stack() + raise e + dir, filename = parse_path(path) # This cache checking is required at this point. There # are inodes in the cache that have not been written to @@ -2386,7 +2750,10 @@ def lookup_dirent(self, path): continue new_dirent = GmailDirent(dirent_msg, inode, self) log_debug2("cached dirent: '%s'" % (new_dirent.path())) - self.dirent_cache[new_dirent.path()] = new_dirent + if self.dirent_cache.has_key(new_dirent.path()): + new_dirent = self.dirent_cache[new_dirent.path()] + else: + self.dirent_cache[new_dirent.path()] = new_dirent if new_dirent.path() == path: log_debug2("lookup_dirent() dirent: '%s'" % (new_dirent.path())) ret_dirent = new_dirent