From 84cf78fb763ef203ee7c34dbc502941a2afe7327 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 8 Jan 2011 08:12:43 -0800 Subject: [PATCH 01/13] rework exception code (and a few other things) --- gmailfs.py | 62 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 33b484f..0c6a32c 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -325,16 +325,17 @@ def imap_uid(imap, cmd, arg1, arg2 = None, arg3 = None, arg4 = None): tries = tries - 1 try: ret = imap.uid(cmd, arg1, arg2, arg3) + if not ret == None: + return ret; except Exception, e: log_error("imap.uid() error: %s (tries left: %d)" % (str(e), tries)) - imap.fs.kick_imap(imap) if tries <= 0: raise except: log_error("imap.uid() unknown error: (tries left: %d)" % (tries)) - imap.fs.kick_imap(imap) if tries <= 0: raise + imap.fs.kick_imap(imap) return ret def __imap_append(imap, fsNameVar, flags, now, msg): @@ -355,6 +356,16 @@ def __imap_append(imap, fsNameVar, flags, now, msg): imap.fs.kick_imap(imap) if tries <= 0: raise + except Exception, e: + log_error("imap.append() exception: %s" % (str(e))) + imap.fs.kick_imap(imap) + if tries <= 0: + raise + except: + log_error("imap.append() unknown error: (tries left: %d)" % (tries)) + imap.fs.kick_imap(imap) + if tries <= 0: + raise return rsp, data def imap_getquotaroot(imap, fsNameVar): @@ -685,7 +696,7 @@ def _getMsguidsByQuery(about, imap, queries, or_query = 0): except: log_error("IMAP error on SEARCH") log_error("queryString: ->%s<-" % (queryString)) - print "\nIMAP exception ", sys.exc_info()[0] + print "\nIMAP exception, exiting", sys.exc_info()[0] exit(-1) finally: imap.lock.release() @@ -925,9 +936,13 @@ def unlink(self): to_trash.append(str(self.dirent_msg.uid)) if len(to_trash): imap_trash_uids(self.fs.imap, to_trash) + semget(self.fs.lookup_lock) deleted = self.fs.dirent_cache.pop(self.path()) if deleted != None and deleted != self: - log_error("removed wrong dirent from cache") + log_error("[%s] removed wrong dirent from cache self: %s" % (str(thread.get_ident()), str(self))) + log_error("\tmy path: '%s' uid: '%s' obj: %s" % (self.path(), str(self.dirent_msg.uid), str(self))) + log_error("\tdl path: '%s' uid: '%s' obj: %s" % (deleted.path(), str(deleted.dirent_msg.uid), str(deleted))) + self.fs.lookup_lock.release() #@-node:class GmailDirent @@ -1396,21 +1411,33 @@ def populate_buffer(self, deleteAfter): class Gmailfs(Fuse): def kick_imap(self, imap): + print("kicking imap connection...") + print("disconnecting...") + self.disconnect_from_server(imap) + print("disonnected") try: - self.disconnect_from_server(imap) + self.connect_to_server(imap) + except Exception, e: + print("kick connect exception: '%s'" % str(e)) except: - pass - self.connect_to_server(imap) + print("kick connect unknown exception") def disconnect_from_server(self, imap): + # these are just to be nice to the server. It + # does not matter if they succeed because the + # init below will just blow everything away. try: imap.close() + imap.logout() + imap.shutdown() except: - pass + print("shutdown exception"); try: - imap.logout() + imap.__init__("imap.gmail.com", 993) except: - pass + print("reconnect exception"); + + return #@ @+others def connect_to_server(self, imap = None): @@ -1418,6 +1445,7 @@ def connect_to_server(self, imap = None): global password global username + print("connect_to_server()...") fsNameVar = DefaultFsname password = DefaultPassword username = DefaultUsername @@ -1439,6 +1467,7 @@ def connect_to_server(self, imap = None): resp, data = imap.select(fsNameVar) log_debug1("select2 '%s' resp: '%s' data: '%s'" % (fsNameVar, resp, data)) return + print("connect_to_server() done") return imap def get_imap(self): @@ -1451,7 +1480,7 @@ def put_imap(self, imap): def __init__(self, extraOpts, mountpoint, *args, **kw): Fuse.__init__(self, *args, **kw) - self.nr_imap_threads = 4 + self.nr_imap_threads = 3 self.imap_pool = Queue.Queue(self.nr_imap_threads) for i in range(self.nr_imap_threads): self.imap_pool.put(self.connect_to_server()) @@ -1747,6 +1776,8 @@ def unlink(self, path): log_entry("unlink called on:"+path) try: dirent = self.lookup_dirent(path) + if dirent == None: + return -EEXIST dirent.unlink() return 0 except: @@ -1999,11 +2030,11 @@ def mknod(self, path, mode, dev): #@-node:mknod def mk_dirent(self, inode, path): + # this should keep us from racing with lookup_dirent() + semget(self.lookup_lock) if self.dirent_cache.has_key(path): log_debug("dirent cache hit on path: '%s'" % (path)) return self.dirent_cache[path] - # this should keep us from racing with lookup_dirent() - semget(self.lookup_lock) filename, dir = parse_path(path) msg = self.mk_dirent_msg(path, inode.ino) dirent = GmailDirent(msg, inode, self) @@ -2386,7 +2417,10 @@ def lookup_dirent(self, path): continue new_dirent = GmailDirent(dirent_msg, inode, self) log_debug2("cached dirent: '%s'" % (new_dirent.path())) - self.dirent_cache[new_dirent.path()] = new_dirent + if self.dirent_cache.has_key(new_dirent.path()): + new_dirent = self.dirent_cache[new_dirent.path()] + else: + self.dirent_cache[new_dirent.path()] = new_dirent if new_dirent.path() == path: log_debug2("lookup_dirent() dirent: '%s'" % (new_dirent.path())) ret_dirent = new_dirent From 3431b36bc77a2f4b1be7d8fb29607b2f6aad4c3e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 8 Jan 2011 11:26:42 -0800 Subject: [PATCH 02/13] fix up quota exceptions and make the exception handling a bit more concise --- gmailfs.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 0c6a32c..4a7d436 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -351,35 +351,25 @@ def __imap_append(imap, fsNameVar, flags, now, msg): time.sleep(1) rsp = None continue - except RuntimeError, e: - log_error("imap.append() error: %s" % (str(e))) - imap.fs.kick_imap(imap) - if tries <= 0: - raise - except Exception, e: - log_error("imap.append() exception: %s" % (str(e))) - imap.fs.kick_imap(imap) - if tries <= 0: - raise except: - log_error("imap.append() unknown error: (tries left: %d)" % (tries)) + log_error("imap.append() exception: '%s' (tries left: %d)" % (sys.exc_info()[0], tries)) imap.fs.kick_imap(imap) if tries <= 0: raise return rsp, data def imap_getquotaroot(imap, fsNameVar): - tries = 2 + tries = 3 ret = None while ret == None: + tries = tries - 1 try: ret = imap.getquotaroot(fsNameVar) - except RuntimeError, e: - log_error("imap.getquotaroot() error: %s" % (str(e))) + except: + log_error("imap.getquotaroot() error: %s" % sys.exc_info()[0]) imap.fs.kick_imap(imap) if tries <= 0: raise - tries = tries - 1 return ret # The IMAP uid commands can take multiple uids and return From 9cccf764c613712974ae60dfc829916faaab3426 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 8 Jan 2011 12:23:00 -0800 Subject: [PATCH 03/13] fix some more exception handling --- gmailfs.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 4a7d436..b0508a5 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -254,12 +254,19 @@ def log_warning(str): return def parse_path(path): - # should we check that there's always a / in the path?? - ind = string.rindex(path, '/') - parent_dir = path[:ind] - filename = path[ind+1:] + try: + # rindex excepts when there's no / + ind = string.rindex(path, '/') + parent_dir = path[:ind] + filename = path[ind+1:] + except: + print("parse_path() exception") + ind = 0 + parent_dir = "" + filename = path if len(parent_dir) == 0: parent_dir = "/" + log_debug4("parse_path('%s') parent_dir: '%s', filename: '%s'" % (path, parent_dir, filename)) return parent_dir, filename @@ -330,11 +337,11 @@ def imap_uid(imap, cmd, arg1, arg2 = None, arg3 = None, arg4 = None): except Exception, e: log_error("imap.uid() error: %s (tries left: %d)" % (str(e), tries)) if tries <= 0: - raise + abort() except: log_error("imap.uid() unknown error: (tries left: %d)" % (tries)) if tries <= 0: - raise + abort() imap.fs.kick_imap(imap) return ret @@ -353,9 +360,9 @@ def __imap_append(imap, fsNameVar, flags, now, msg): continue except: log_error("imap.append() exception: '%s' (tries left: %d)" % (sys.exc_info()[0], tries)) - imap.fs.kick_imap(imap) if tries <= 0: - raise + abort() + imap.fs.kick_imap(imap) return rsp, data def imap_getquotaroot(imap, fsNameVar): @@ -369,7 +376,7 @@ def imap_getquotaroot(imap, fsNameVar): log_error("imap.getquotaroot() error: %s" % sys.exc_info()[0]) imap.fs.kick_imap(imap) if tries <= 0: - raise + abort() return ret # The IMAP uid commands can take multiple uids and return @@ -1008,7 +1015,7 @@ def i_write_out(self, desc): value = self.xattr[attr] payload_name = 'xattr-'+attr log_debug1("adding xattr payload named '%s': '%s'" % (payload_name, value)) - msg_add_payload(msg, value, payload_name) + msg_add_payload(self.inode_msg, value, payload_name) # remember where this is in case we have to delete it i_orig_uid = self.inode_msg.uid # because this wipes it out @@ -1676,8 +1683,8 @@ def readlink(self, path): e = OSError("Not a link"+path) e.errno = EINVAL raise e - log_debug("about to follow link in body:"+inode.msg.as_string()) - body = fixQuotedPrintable(inode.msg.as_string()) + log_debug("about to follow link in body:"+inode.inode_msg.as_string()) + body = fixQuotedPrintable(inode.inode_msg.as_string()) m = re.search(SymlinkTag+'='+LinkStartDelim+'(.*)'+ LinkEndDelim,body) return m.group(1) @@ -2064,6 +2071,7 @@ def mkdir(self, path, mode): inode = self.mk_inode(mode|S_IFDIR, 1, 2) self.link_inode(path, inode) parentdir, name = parse_path(path) + print("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) parentdirinode = self.lookup_inode(parentdir) parentdirinode.i_nlink += 1 parentdirinode.mark_dirty("mkdir") From 082a7bef8564410ee1dac2b851259af29af17f4b Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 8 Jan 2011 12:23:35 -0800 Subject: [PATCH 04/13] fix readlink() path decoding I encoded the path, but didn't decode it, so if there was a / in the link: lrwxrwxrwx 1 dave dave 0 2011-01-08 11:52 fstest_2ae632ecf36446af4c385b196f944404 -> __fs__fstest_0282f55576d57179fef3f9336df52e1c__fs__fstest_1512bf64248c80b21afc9911cc5a4bca --- gmailfs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gmailfs.py b/gmailfs.py index b0508a5..5c694a7 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -1687,7 +1687,10 @@ def readlink(self, path): body = fixQuotedPrintable(inode.inode_msg.as_string()) m = re.search(SymlinkTag+'='+LinkStartDelim+'(.*)'+ LinkEndDelim,body) - return m.group(1) + link_target = m.group(1) + link_target = _pathSeparatorDecode(link_target) + return link_target + #@-node:readlink #@+node:readdir From c7e60d7b701d2807bfce72917238e0ac43af28fa Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 11 Jan 2011 11:01:35 -0800 Subject: [PATCH 05/13] moving along, added fsck I could be a lot better about these changelogs. This adds some debugging for things that hung during dirtying operations --- gmailfs.py | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 175 insertions(+), 5 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 5c694a7..c76f78d 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -120,6 +120,7 @@ UserConfigFile = abspath(expanduser("~/.gmailfs.conf")) GMAILFS_VERSION = '5' +PATHNAME_MAX = 256 PathStartDelim = '__a__' PathEndDelim = '__b__' @@ -867,12 +868,32 @@ def clear_dirty(self, nr): log_info("cleared original dirty reason: '%s'" % (orig_reason)) return msg + # I was getting things blocked on addition to the dirty list. I thought + # the writeout threads had died. But, I put this in and miraculously it + # started to work ok. There might be a bug in the blocking Queue.put() + # that causes it to hang when it shouldn't. This may work around it. + def put_on_global_list(self): + tries = 0 + should_block = 1 + timeout = 10 + success = 0 + + while success == 0: + try: + self.fs.dirty_objects.put(self, should_block, timeout) + success = 1 + except Queue.Full: + tries = tries + 1 + print("[%d] hung on dirty (%d long) list for %d seconds" % + (thread.get_ident(), self.fs.dirty_objects.qsize(), tries*timeout)) + traceback.print_stack() + def mark_dirty(self, desc): self.__dirty = desc self.dirty_reasons.put(desc) try: - self.dirty_mark.put_nowait(desc); - self.fs.dirty_objects.put(self) + self.dirty_mark.put_nowait(desc) + self.put_on_global_list() except: log_debug("mark_dirty('%s') skipped global list, already dirty" % (self.to_str())) log_debug1("mark_dirty('%s') because '%s' (%d reasons)" % @@ -1468,7 +1489,20 @@ def connect_to_server(self, imap = None): return imap def get_imap(self): - return self.imap_pool.get() + imap = None + timeout = 1 + block = 1 + tries = 0 + + while imap == None: + tries = tries + 1 + try: + imap = self.imap_pool.get(block, timeout) + except: + if tries % 10 == 0: + print("[%d] hung on getting imap worker for %d seconds" % (thread.get_ident(), tries)) + traceback.print_stack() + return imap def put_imap(self, imap): self.imap_pool.put(imap) @@ -1477,6 +1511,11 @@ def put_imap(self, imap): def __init__(self, extraOpts, mountpoint, *args, **kw): Fuse.__init__(self, *args, **kw) + self.imap = self.connect_to_server() + if "IMAPFS_FSCK" in os.environ: + self.fsck() + exit(0) + self.nr_imap_threads = 3 self.imap_pool = Queue.Queue(self.nr_imap_threads) for i in range(self.nr_imap_threads): @@ -1539,7 +1578,6 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): #04:52.69 CAPABILITIES: ('IMAP4REV1', 'UNSELECT', 'IDLE', 'NAMESPACE', 'QUOTA', 'XLIST', 'CHILDREN', 'XYZZY') #04:52.97 < * CAPABILITY IMAP4rev1 UNSELECT LITERAL+ IDLE NAMESPACE QUOTA ID XLIST CHILDREN X-GM-EXT-1 UIDPLUS COMPRESS=DEFLATE - self.imap = self.connect_to_server() # This select() can be done read-only # might be useful for implementing "mount -o ro" log_info("Connected to gmail") @@ -1590,6 +1628,122 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): flags = 1 #@-node:attribs + def fsck(self): + + print ("fsck()") + semget(self.imap.lock) + resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') + self.imap.lock.release() + uids = msgids[0].split() + print ("%d messages found..." % (len(uids))) + joined_uids = string.join(msgids[0].split(), ",") + log_debug2("about to delete msgids: ->%s<-" % (joined_uids)) + if (len(uids) == 0): + print ("empty mailbox") + return +# def parse_inode_msg_subj(self, inode_msg): +# def parse_dirent_msg(self, msg): + # these probably aren't precise enough. What if a dirent is for a + # file called "foo_inode_msg_bar"?? + dirent_uids = _getMsguidsByQuery("get all dirents", self.imap, ['dirent_msg ']) + inode_uids = _getMsguidsByQuery("get all inodes", self.imap, ['inode_msg ']) + + for uid in dirent_uids: + #subject = msg['Subject'] + print "dirent uid: '%s'" % (uid) + for uid in inode_uids: + #subject = msg['Subject'] + print "inode_uid: '%s'" % (uid) + + all_dirs = {}; + for msgid, msg in fetch_full_messages(self.imap, dirent_uids).items(): + dirent_parts = self.parse_dirent_msg(msg) + pathname = _pathSeparatorDecode(dirent_parts[PathNameTag]) + + dirent_parts['pathname'] = pathname + dirent_parts['msg'] = msg + + filename = dirent_parts[FileNameTag] + if not all_dirs.has_key(pathname): + all_dirs[pathname] = {} + directory = all_dirs[pathname] + if directory.has_key(filename): + existing = directory[filename] + print "ERROR: '%s' occurs twice in dir: '%s'" % (filename, pathname) + if existing['msg'].uid > msgid: + # throw away the current message that + # we're looking at + imap_trash_msg(self.imap, msg) + # and forget that we ever saw it + continue + else: + # throw away the message that was there + imap_trash_msg(self.imap, existing['msg']) + # not stricly necessary, but clearer + directory.pop(filename) + directory[filename] = dirent_parts + # are these copy by value or reference??!?!? + all_dirs[pathname] = directory + print "[%s] set '%s': '%s'" % (str(msgid), pathname, filename) + + inode_refcount = {} + for dirname, dirents in all_dirs.iteritems(): + for fname, dirent in dirents.iteritems(): + full = dirent['pathname'] + "/" + fname + grandparent_path, parent_name = parse_path(dirent['pathname']) + print "grandparent: '%s' parent: '%s' f: '%s'" % (grandparent_path, parent_name, fname) + if not all_dirs.has_key(grandparent_path): + print "ERROR: could not find grandparent dir '%s' for '%s'" % (grandparent_path, full) + imap_trash_msg(self.imap, dirent['msg']) + continue + grandparent = all_dirs[grandparent_path] + if not grandparent.has_key(parent_name): + print "ERROR: could not find parent entry '%s' in dir '%s'" % (parent_name, grandparent_path) + imap_trash_msg(self.imap, dirent['msg']) + continue + ino = dirent[RefInodeTag] + if not inode_refcount.has_key(ino): + inode_refcount[ino] = 0 + inode_refcount[ino] = inode_refcount[ino] + 1 + + inodes_seen = {} + print "fetching all inodes..." + for msgid, msg in fetch_full_messages(self.imap, inode_uids).items(): + inode_parts = self.parse_inode_msg_subj(msg) + ino = inode_parts[InodeTag] + + if not inode_refcount.has_key(ino): + # FIXME: link into lost+found dir + print "ERROR: unlinked inode: '%s'" % (ino) + imap_trash_msg(self.imap, msg) + continue + if inodes_seen.has_key(ino): + existing = inodes_seen[ino] + print "ERROR: duplicate messages for inode: '%s'" % (str(ino)) + if existing['msg'].uid > msgid: + # throw away the current message that + # we're looking at + imap_trash_msg(self.imap, msg) + # and forget that we ever saw it + continue + else: + # throw away the message that was there + imap_trash_msg(self.imap, existing['msg']) + # not stricly necessary, but clearer + inodes_seen.pop(ino) + inode_parts['msg'] = msg + inodes_seen[ino] = inode_parts + + stored_nr_links = inode_parts[NumberLinksTag] + counted_nr_links = inode_refcount[ino] + if stored_nr_links != counted_nr_links: + print "WARNING: ino: %s claims to have %s links, but we counted %s" % (ino, stored_nr_links, counted_nr_links) + continue + print "GOOD: linked inode: '%s' i_nlink: %d" % (ino, inode_refcount[ino]) + + + + class GmailStat(fuse.Stat): def __init__(self): @@ -1877,7 +2031,9 @@ def mk_dirent_msg(self, path, inode_nr_ref): inode_nr_ref, fsNameVar, GMAILFS_VERSION) - return mkmsg(subject, body) + msg = mkmsg(subject, body) + log_debug1("mk_dirent_msg('%s', 'ino=%s') done" % (path, str(inode_nr_ref))) + return msg def parse_inode_msg_subj(self, inode_msg): subject = inode_msg['Subject'].replace('\u003d','=') @@ -2037,13 +2193,18 @@ def mk_dirent(self, inode, path): return self.dirent_cache[path] filename, dir = parse_path(path) msg = self.mk_dirent_msg(path, inode.ino) + log_debug1("mk_dirent_msg(%s) done" % path) dirent = GmailDirent(msg, inode, self) + log_debug1("GmailDirent(%s) done" % path) dirent.mark_dirty("mk_dirent") + log_debug1("mark_dirty '%s' done" % path) if len(self.dirent_cache) > 1000: self.flush_dirent_cache() + log_debug1("cache flush '%s' done" % path) log_debug1("added dirent to cache for path: '%s'" % (dirent.path())) self.dirent_cache[dirent.path()] = dirent self.lookup_lock.release() + log_debug1("mk_dirent('%s') lock released" % path) return dirent def mk_inode(self, mode, size, nlink=1): @@ -2057,6 +2218,8 @@ def mk_inode(self, mode, size, nlink=1): def link_inode(self, path, inode): dirent = self.mk_dirent(inode, path) + inode.i_nlink = inode.i_nlink + 1 + inode.mark_dirty("link_inode()") return dirent def lookup_inode(self, path): @@ -2387,6 +2550,13 @@ def prefetch_dirent_msgs(self, dir): return dirent_msgs_by_iref def lookup_dirent(self, path): + if (len(path) > PATHNAME_MAX): + e = OSError("Pathname too long:"+path) + e.errno = ENAMETOOLONG + print("ENAMETOOLONG") + traceback.print_stack() + raise e + dir, filename = parse_path(path) # This cache checking is required at this point. There # are inodes in the cache that have not been written to From 77f7cad5357874baf01941fb6f8cd81cc9e602fc Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 11 Jan 2011 11:05:18 -0800 Subject: [PATCH 06/13] reduce stdout verbosity of block writeout --- gmailfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gmailfs.py b/gmailfs.py index c76f78d..452b9da 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -1344,7 +1344,7 @@ def b_write_out(self, desc): self.buffer_lock.release() if self.inode.size / self.block_size == self.block_nr: part = self.inode.size % self.block_size - print("on last block, so only writing out %d/%d bytes of block" % (part, len(buf))) + log_debug2("on last block, so only writing out %d/%d bytes of block" % (part, len(buf))) buf = buf[:part] arr = array.array('c') From ede7ba20ed6f0650fd2e6e388bd27a475709979f Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 11 Jan 2011 12:31:18 -0800 Subject: [PATCH 07/13] rework dirty writeout code The basic problem is that we can't block dirty writeout by marking an object dirty. So, we have to make a way to remember dirty objects in a non-blocking way. So, we create a non-size-limited Queue for these, then tie that queue back in to the existing one that _can_ block. --- gmailfs.py | 130 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 41 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 452b9da..5dcd137 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -554,11 +554,10 @@ def __init__ (self, fs, nr): self.nr = nr def write_out_object(self): - try: - # block, and timeout after 1 second - object = self.fs.dirty_objects.get(1, 1) - except: - # effectively success if we timeout + object = self.fs.get_dirty_object() + if object == None: + # the queues are empty, so all is good + time.sleep(1) return 0 # we do not want to sit here sleeping on objects # so if we can not get the lock, move on to another @@ -868,36 +867,16 @@ def clear_dirty(self, nr): log_info("cleared original dirty reason: '%s'" % (orig_reason)) return msg - # I was getting things blocked on addition to the dirty list. I thought - # the writeout threads had died. But, I put this in and miraculously it - # started to work ok. There might be a bug in the blocking Queue.put() - # that causes it to hang when it shouldn't. This may work around it. - def put_on_global_list(self): - tries = 0 - should_block = 1 - timeout = 10 - success = 0 - - while success == 0: - try: - self.fs.dirty_objects.put(self, should_block, timeout) - success = 1 - except Queue.Full: - tries = tries + 1 - print("[%d] hung on dirty (%d long) list for %d seconds" % - (thread.get_ident(), self.fs.dirty_objects.qsize(), tries*timeout)) - traceback.print_stack() - - def mark_dirty(self, desc): + def mark_dirty(self, desc, can_block = 1): self.__dirty = desc self.dirty_reasons.put(desc) try: self.dirty_mark.put_nowait(desc) - self.put_on_global_list() + self.fs.queue_dirty(self, can_block) except: log_debug("mark_dirty('%s') skipped global list, already dirty" % (self.to_str())) - log_debug1("mark_dirty('%s') because '%s' (%d reasons)" % - (self.to_str(), desc, self.dirty_reasons.qsize())) + log_debug1("mark_dirty('%s') because '%s' (%d reasons, %d total)" % + (self.to_str(), desc, self.dirty_reasons.qsize(), self.fs.nr_dirty_objects())) def to_str(self): return "Dirtyable.to_str()" @@ -1024,10 +1003,10 @@ def __init__(self, inode_msg, fs): def to_str(self): return "inode(%s)" % (str(self.ino)) - def mark_dirty(self, desc): + def mark_dirty(self, desc, can_block = 1): log_debug2("inode mark_dirty(%s) size: '%s'" % (desc, str(self.size))) self.mtime = int(time.time()) - Dirtyable.mark_dirty(self, desc) + Dirtyable.mark_dirty(self, desc, can_block) def i_write_out(self, desc): log_debug2("i_write_out() self: '%s'" % (self)) @@ -1361,7 +1340,12 @@ def b_write_out(self, desc): log_debug("b_write_out() finished, rsp: '%s'" % str(msgid)) if msgid > 0: log_debug("Sent write commit ok") - self.inode.mark_dirty("commit data block") + # This is a special case. This b_write_out() happens in a worker thread, + # and if we block it waiting on dirty data to be written out, we may end + # up deadlocking. So, put the inode on a dirty list, but do not block + # doing it. + can_block = 0 + self.inode.mark_dirty("commit data block", can_block) tmpf.close() ret = 0 else: @@ -1507,6 +1491,70 @@ def get_imap(self): def put_imap(self, imap): self.imap_pool.put(imap) + def drain_nonblocking_dirty_queue(self): + src = self.dirty_objects_nonblocking + while not src.empty(): + try: + o = src.get_nowait() + except Queue.Empty: + return + self.queue_dirty_blockable(o) + + # I was getting things blocked on addition to the dirty list. I thought + # the writeout threads had died. But, I put this in and miraculously it + # started to work ok. There might be a bug in the blocking Queue.put() + # that causes it to hang when it shouldn't. This may work around it. + def queue_dirty_blockable(self, obj): + tries = 0 + timeout = 10 + success = 0 + can_block = 1 + + while success == 0: + try: + self.dirty_objects.put(obj, can_block, timeout) + success = 1 + except Queue.Full: + tries = tries + 1 + print("[%d] hung on dirty (%d long) list for %d seconds" % + (thread.get_ident(), self.dirty_objects.qsize(), tries*timeout)) + traceback.print_stack() + + def queue_dirty(self, obj, can_block = 1): + if can_block: + # take the opportunity to move the non-blocking queue + # over to the blocking one. The more often you do this + # the less chance there is for the queue to get too + # large + self.drain_nonblocking_dirty_queue() + self.queue_dirty_blockable(obj) + else: + # this one is non-blocking on put()s because it has no + # size limit + self.dirty_objects_nonblocking.put(obj) + print "end queue_dirty(%s, %d) queue size now: %d/%d" % (obj, can_block, self.nr_dirty_objects.qsize(), self.dirty_objects_nonblocking.qsize()) + + + def nr_dirty_objects(self): + size = self.dirty_objects.qsize() + self.dirty_objects_nonblocking.qsize() + return size + + def get_dirty_object(self): + try: + obj = self.dirty_objects.get_nowait() + log_debug3("get_dirty_object() found one in normal queue: '%s'" % (obj)) + return obj + except Queue.Empty: + pass + try: + obj = self.dirty_objects_nonblocking.get_nowait() + log_debug3("get_dirty_object() found one in nonblock queue: '%s'" % (obj)) + return obj + except Queue.Empty: + pass + log_debug3("get_dirty_object() found nothing") + return None + #@+node:__init__ def __init__(self, extraOpts, mountpoint, *args, **kw): Fuse.__init__(self, *args, **kw) @@ -1517,11 +1565,14 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): exit(0) self.nr_imap_threads = 3 + if "IMAPFS_NR_THREADS" in os.environ: + self.nr_imap_threads = os.environ['IMAPFS_NR_THREADS'] self.imap_pool = Queue.Queue(self.nr_imap_threads) for i in range(self.nr_imap_threads): self.imap_pool.put(self.connect_to_server()) self.dirty_objects = Queue.Queue(50) + self.dirty_objects_nonblocking = Queue.Queue() self.lookup_lock = threading.Semaphore(1) self.inode_cache_lock = threading.Semaphore(1) @@ -1913,16 +1964,13 @@ def flush_dirent_cache(self): self.put_inode(dirent.inode) while 1: - try: - # no args means do not block, and trow - # exception immediately if empty - object = self.fs.dirty_objects.get() - write_out(object, "flush_dirent_cache()") - log_info("flush_dirent_cache() wrote out %s" % (object.to_str())) - except: + object = self.get_dirty_object() + if object == None: log_info("no more object to flush") break - size = self.fs.dirty_objects.qsize() + write_out(object, "flush_dirent_cache()") + log_info("flush_dirent_cache() wrote out %s" % (object.to_str())) + size = self.fs.nr_dirty_objects() log_info("explicit flush done") #@+node:unlink @@ -2237,7 +2285,7 @@ def mkdir(self, path, mode): inode = self.mk_inode(mode|S_IFDIR, 1, 2) self.link_inode(path, inode) parentdir, name = parse_path(path) - print("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) + log_debug1("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) parentdirinode = self.lookup_inode(parentdir) parentdirinode.i_nlink += 1 parentdirinode.mark_dirty("mkdir") From 920216c984b315b150534980356e2f9f93c3c4d9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 13 Jan 2011 10:42:38 -0800 Subject: [PATCH 08/13] lots of junk I need to be better about committing these things in smaller pieces, I know. I promise I'll get better if/when I start getting more patches. :) --- gmailfs.py | 166 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 65 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 5dcd137..354c69b 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -161,6 +161,8 @@ rsp_cache = {} debug = 1 +if "DEBUG" in os.environ: + debug = int(os.environ['DEBUG']) if debug >= 3: imaplib.Debug = 3 #imaplib.Debug = 4 @@ -563,14 +565,17 @@ def write_out_object(self): # so if we can not get the lock, move on to another # object got_lock = object.writeout_lock.acquire(0) + log_debug3("write out got_lock: '%s' obj dirty: %s" % (str(got_lock), str(object.dirty()))) if not got_lock: - self.fs.dirty_objects.put(object) + dont_block = 1 + self.fs.queue_dirty(object, dont_block) return -1 sem_msg[object.writeout_lock] = "acquired write_out_object()" reason = Dirtyable.dirty_reason(object) start = time.time() ret = write_out_nolock(object, "bdflushd") end = time.time() + log_debug3("write out about to releaselock") object.writeout_lock.release() sem_msg[object.writeout_lock] += " released write_out_object()" size = self.fs.dirty_objects.qsize() @@ -788,14 +793,14 @@ def getSingleMessageByQuery(desc, imap, q): return fetch_full_message(imap, msgid) def _pathSeparatorEncode(path): - s1 = re.sub("/","__fs__",path) - s2 = re.sub("-","__mi__",s1) - return re.sub("\+","__pl__",s2) + #s1 = re.sub("/","__fs__",path) + #s2 = re.sub("-","__mi__",s1) + return re.sub("\+","__pl__",path) def _pathSeparatorDecode(path): - s1 = re.sub("__fs__","/",path) - s2 = re.sub("__mi__","-",s1) - return re.sub("__pl__","+",s2) + #s1 = re.sub("__fs__","/",path) + #s2 = re.sub("__mi__","-",s1) + return re.sub("__pl__","+",path) def _logException(msg): @@ -918,7 +923,7 @@ def d_write_out(self, desc): raise e return 0 - def unlink(self): + def d_unlink(self): # FIXME, don't allow directory unlinking when children log_debug1("unlink path:"+self.path()+" with nlinks:"+str(self.inode.i_nlink)) if self.inode.mode & S_IFDIR: @@ -932,8 +937,15 @@ def unlink(self): to_trash = self.inode.dec_nlink() to_trash.append(str(self.dirent_msg.uid)) if len(to_trash): + for uid in to_trash: + log_debug1("unlink() going to trash uid: %s" % (uid)) imap_trash_uids(self.fs.imap, to_trash) semget(self.fs.lookup_lock) + # this ensures that the (now dead) dentry will never get written out + while (self.dirty() > 0): + dirty_token = self.dirty() + print "d_unlink() dirty token: '%s'" % (dirty_token) + self.clear_dirty(dirty_token) deleted = self.fs.dirent_cache.pop(self.path()) if deleted != None and deleted != self: log_error("[%s] removed wrong dirent from cache self: %s" % (str(thread.get_ident()), str(self))) @@ -1016,6 +1028,7 @@ def i_write_out(self, desc): payload_name = 'xattr-'+attr log_debug1("adding xattr payload named '%s': '%s'" % (payload_name, value)) msg_add_payload(self.inode_msg, value, payload_name) + log_debug3("i_write_out() self.dirty: '%s' desc: '%s'" % (Dirtyable.dirty_reason(self), desc)) # remember where this is in case we have to delete it i_orig_uid = self.inode_msg.uid # because this wipes it out @@ -1088,10 +1101,18 @@ def dec_nlink(self): return [] log_debug2("truncating inode") subject = 'b='+str(self.ino)+'' + # either wait until it is fully written out + got_lock = self.writeout_lock.acquire() + # or make sure that it never is + while (self.dirty() > 0): + dirty_token = self.dirty() + self.clear_dirty(dirty_token) + block_uids = _getMsguidsByQuery("unlink blocks", self.fs.imap, [subject]) to_trash = [] to_trash.extend(block_uids) to_trash.append(str(self.inode_msg.uid)) + self.writeout_lock.release() return to_trash def fill_from_inode_msg(self): @@ -1418,7 +1439,9 @@ def kick_imap(self, imap): self.disconnect_from_server(imap) print("disonnected") try: + sys.stderr.write("connecting to server..." % (i)) self.connect_to_server(imap) + sys.stderr.write("done\n") except Exception, e: print("kick connect exception: '%s'" % str(e)) except: @@ -1447,7 +1470,6 @@ def connect_to_server(self, imap = None): global password global username - print("connect_to_server()...") fsNameVar = DefaultFsname password = DefaultPassword username = DefaultUsername @@ -1469,7 +1491,6 @@ def connect_to_server(self, imap = None): resp, data = imap.select(fsNameVar) log_debug1("select2 '%s' resp: '%s' data: '%s'" % (fsNameVar, resp, data)) return - print("connect_to_server() done") return imap def get_imap(self): @@ -1532,7 +1553,8 @@ def queue_dirty(self, obj, can_block = 1): # this one is non-blocking on put()s because it has no # size limit self.dirty_objects_nonblocking.put(obj) - print "end queue_dirty(%s, %d) queue size now: %d/%d" % (obj, can_block, self.nr_dirty_objects.qsize(), self.dirty_objects_nonblocking.qsize()) + log_debug3("end queue_dirty(%s, %d) queue size now: %d/%d" % + (obj, can_block, self.dirty_objects.qsize(), self.dirty_objects_nonblocking.qsize())) def nr_dirty_objects(self): @@ -1569,7 +1591,9 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): self.nr_imap_threads = os.environ['IMAPFS_NR_THREADS'] self.imap_pool = Queue.Queue(self.nr_imap_threads) for i in range(self.nr_imap_threads): + sys.stderr.write("connecting thread %d to server..." % (i)) self.imap_pool.put(self.connect_to_server()) + sys.stderr.write("done\n") self.dirty_objects = Queue.Queue(50) self.dirty_objects_nonblocking = Queue.Queue() @@ -1672,6 +1696,23 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): #exit(0) #elf.mythread() + log_debug1("init looking for root inode") + path = "/" + inode = self.lookup_inode(path) + if (inode == None) and (path == '/'): + log_info("creating root inode") + mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IWUSR|S_IRGRP|S_IXGRP|S_IXOTH|S_IROTH + inode = self.mk_inode(mode, 1) + inode.i_nlink = inode.i_nlink + 1 + dirent = self.link_inode(path, inode) + write_out(inode, "new root inode") + write_out(dirent, "new root dirent") + log_info("root inode uids: %s %s" % (dirent.dirent_msg.uid, inode.inode_msg.uid)) + inode = self.lookup_inode(path) + if inode == None: + log_info("uh oh, can't find root inode") + exit(-1) + pass #@-node:__init__ @@ -1706,7 +1747,9 @@ def fsck(self): #subject = msg['Subject'] print "inode_uid: '%s'" % (uid) - all_dirs = {}; + dir_members = {}; + path_to_dirent = {}; + print "fetching dirent msgs..." for msgid, msg in fetch_full_messages(self.imap, dirent_uids).items(): dirent_parts = self.parse_dirent_msg(msg) pathname = _pathSeparatorDecode(dirent_parts[PathNameTag]) @@ -1715,58 +1758,65 @@ def fsck(self): dirent_parts['msg'] = msg filename = dirent_parts[FileNameTag] - if not all_dirs.has_key(pathname): - all_dirs[pathname] = {} - directory = all_dirs[pathname] + if not dir_members.has_key(pathname): + dir_members[pathname] = {} + directory = dir_members[pathname] if directory.has_key(filename): existing = directory[filename] print "ERROR: '%s' occurs twice in dir: '%s'" % (filename, pathname) if existing['msg'].uid > msgid: # throw away the current message that # we're looking at - imap_trash_msg(self.imap, msg) + ##imap_trash_msg(self.imap, msg) # and forget that we ever saw it continue else: # throw away the message that was there - imap_trash_msg(self.imap, existing['msg']) + ##imap_trash_msg(self.imap, existing['msg']) # not stricly necessary, but clearer directory.pop(filename) directory[filename] = dirent_parts # are these copy by value or reference??!?!? - all_dirs[pathname] = directory - print "[%s] set '%s': '%s'" % (str(msgid), pathname, filename) + dir_members[pathname] = directory + print "[%s] found in path '%s': file: '%s'" % (str(msgid), pathname, filename) + # the if is to handle "/" + if len(filename) > 0: + full = dirent_parts['pathname'] + filename + else: + full = "/" + path_to_dirent[full] = dirent_parts inode_refcount = {} - for dirname, dirents in all_dirs.iteritems(): - for fname, dirent in dirents.iteritems(): - full = dirent['pathname'] + "/" + fname - grandparent_path, parent_name = parse_path(dirent['pathname']) - print "grandparent: '%s' parent: '%s' f: '%s'" % (grandparent_path, parent_name, fname) - if not all_dirs.has_key(grandparent_path): - print "ERROR: could not find grandparent dir '%s' for '%s'" % (grandparent_path, full) - imap_trash_msg(self.imap, dirent['msg']) - continue - grandparent = all_dirs[grandparent_path] - if not grandparent.has_key(parent_name): - print "ERROR: could not find parent entry '%s' in dir '%s'" % (parent_name, grandparent_path) - imap_trash_msg(self.imap, dirent['msg']) - continue - ino = dirent[RefInodeTag] - if not inode_refcount.has_key(ino): - inode_refcount[ino] = 0 + for full, dirent in path_to_dirent.iteritems(): + ino = dirent[RefInodeTag] + if not inode_refcount.has_key(ino): + inode_refcount[ino] = 0 + inode_refcount[ino] = inode_refcount[ino] + 1 + # Is it a directory? + if dir_members.has_key(full): inode_refcount[ino] = inode_refcount[ino] + 1 + parent_path = dirent['pathname'] + print "process parent: '%s' for '%s'" % (parent_path, full) + if not len(parent_path): + print "WARNING: zero-length parent: '%s' for '%s' hope it's /)" % (parent_path, full) + continue + if not path_to_dirent.has_key(parent_path): + print "ERROR: could not find parent entry '%s'" % (parent_path) + ##imap_trash_msg(self.imap, dirent['msg']) + continue + inodes_seen = {} print "fetching all inodes..." for msgid, msg in fetch_full_messages(self.imap, inode_uids).items(): inode_parts = self.parse_inode_msg_subj(msg) ino = inode_parts[InodeTag] + log_debug2("msgid: %s has ino: %s" % (msgid, ino)) if not inode_refcount.has_key(ino): # FIXME: link into lost+found dir print "ERROR: unlinked inode: '%s'" % (ino) - imap_trash_msg(self.imap, msg) + ##imap_trash_msg(self.imap, msg) continue if inodes_seen.has_key(ino): existing = inodes_seen[ino] @@ -1774,12 +1824,12 @@ def fsck(self): if existing['msg'].uid > msgid: # throw away the current message that # we're looking at - imap_trash_msg(self.imap, msg) + ##imap_trash_msg(self.imap, msg) # and forget that we ever saw it continue else: # throw away the message that was there - imap_trash_msg(self.imap, existing['msg']) + ##imap_trash_msg(self.imap, existing['msg']) # not stricly necessary, but clearer inodes_seen.pop(ino) inode_parts['msg'] = msg @@ -1828,23 +1878,7 @@ def getattr(self, path): #st_mtime (time of most recent content modification) #st_ctime (time of most recent content modification or metadata change). - log_debug3("getattr() -1") inode = self.lookup_inode(path) - log_debug3("getattr() 0") - if (inode == None) and (path == '/'): - log_info("creating root inode") - mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IWUSR|S_IRGRP|S_IXGRP|S_IXOTH|S_IROTH - inode = self.mk_inode(mode, 1, 2) - dirent = self.link_inode(path, inode) - write_out(inode, "new root inode") - write_out(dirent, "new root dirent") - log_info("root inode uids: %s %s" % (dirent.dirent_msg.uid, inode.inode_msg.uid)) - inode = self.lookup_inode(path) - if inode == None: - log_info("uh oh, can't find root inode") - exit(-1) - log_debug3("getattr() 1") - if inode: log_debug3("getattr() 2") log_debug3("found inode for path: '%s'" % (path)) @@ -1970,7 +2004,7 @@ def flush_dirent_cache(self): break write_out(object, "flush_dirent_cache()") log_info("flush_dirent_cache() wrote out %s" % (object.to_str())) - size = self.fs.nr_dirty_objects() + size = self.nr_dirty_objects() log_info("explicit flush done") #@+node:unlink @@ -1980,7 +2014,7 @@ def unlink(self, path): dirent = self.lookup_dirent(path) if dirent == None: return -EEXIST - dirent.unlink() + dirent.d_unlink() return 0 except: _logException("Error unlinking file"+path) @@ -1999,7 +2033,7 @@ def rmdir(self, path): # e.errno = ENOTEMPTY # raise e dirent = self.lookup_dirent(path) - dirent.unlink() + dirent.d_unlink() # update number of links in parent directory parentdir, filename = parse_path(path) @@ -2015,7 +2049,7 @@ def rmdir(self, path): def symlink(self, oldpath, newpath): log_debug1("symlink: oldpath='%s', newpath='%s'" % (oldpath, newpath)) mode = S_IFLNK|S_IRWXU|S_IRWXG|S_IRWXO - inode = self.mk_inode(mode, 0, 1) + inode = self.mk_inode(mode, 0) inode.symlink_tgt = newpath self.link_inode(oldpath, inode) @@ -2112,13 +2146,13 @@ def rename(self, path_src, path_dst): dst_dirent = self.lookup_dirent(path_dst) if not dst_dirent == None: - dst_dirent.unlink() + dst_dirent.d_unlink() # ensure the inode does not go away between # when we unlink and relink it inode = self.get_inode(src_dirent.inode.ino) # do the unlink first, because otherwise we # will get two dirents at the same path - src_dirent.unlink() + src_dirent.d_unlink() self.link_inode(path_dst, inode) self.put_inode(inode) @@ -2225,7 +2259,7 @@ def mknod(self, path, mode, dev): """ Python has no os.mknod, so we can only do some things """ log_entry("mknod('%s')" % (path)) if S_ISREG(mode) | S_ISFIFO(mode) | S_ISSOCK(mode): - inode = self.mk_inode(mode, 0, 1) + inode = self.mk_inode(mode, 0) self.link_inode(path, inode) # update parent dir?? #open(path, "w") @@ -2255,11 +2289,11 @@ def mk_dirent(self, inode, path): log_debug1("mk_dirent('%s') lock released" % path) return dirent - def mk_inode(self, mode, size, nlink=1): + def mk_inode(self, mode, size): inode = GmailInode(None, self) inode.mode = int(mode) inode.size = int(size) - inode.i_nlink = int(nlink) + inode.i_nlink = 0 inode.mark_dirty("new inode") self.inode_cache[inode.ino] = inode return inode @@ -2282,7 +2316,9 @@ def mkdir(self, path, mode): log_entry("mkdir('%s', %o)" % (path, mode)) if (self.lookup_dirent(path) != None): return -EEXIST - inode = self.mk_inode(mode|S_IFDIR, 1, 2) + inode = self.mk_inode(mode|S_IFDIR, 1) + # extra link for for '.' + inode.i_nlink = inode.i_nlink + 1 self.link_inode(path, inode) parentdir, name = parse_path(path) log_debug1("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) From d249a3efb3f5a8150f0530f22f66cdf7cbe5b447 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 26 Feb 2011 16:43:49 -0800 Subject: [PATCH 09/13] more fsck improvements --- gmailfs.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 354c69b..3da6862 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -1703,7 +1703,10 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): log_info("creating root inode") mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IWUSR|S_IRGRP|S_IXGRP|S_IXOTH|S_IROTH inode = self.mk_inode(mode, 1) - inode.i_nlink = inode.i_nlink + 1 + # "/" is special and gets an extra link. + # It will always appear to have an nlink of 3 + # even when it is empty + inode.i_nlink = inode.i_nlink + 2 dirent = self.link_inode(path, inode) write_out(inode, "new root inode") write_out(dirent, "new root dirent") @@ -1779,9 +1782,13 @@ def fsck(self): # are these copy by value or reference??!?!? dir_members[pathname] = directory print "[%s] found in path '%s': file: '%s'" % (str(msgid), pathname, filename) - # the if is to handle "/" + # the "/" dirent has a path of '/' and a filename: '' if len(filename) > 0: - full = dirent_parts['pathname'] + filename + # the path of things under the root dir already end in / + if len(dirent_parts['pathname']) > 1: + full = dirent_parts['pathname'] + "/" + filename + else: + full = dirent_parts['pathname'] + filename else: full = "/" path_to_dirent[full] = dirent_parts @@ -1790,14 +1797,14 @@ def fsck(self): for full, dirent in path_to_dirent.iteritems(): ino = dirent[RefInodeTag] if not inode_refcount.has_key(ino): - inode_refcount[ino] = 0 - inode_refcount[ino] = inode_refcount[ino] + 1 - # Is it a directory? - if dir_members.has_key(full): + print "creating refcount for '%s'" % (full) + inode_refcount[ino] = 1 + else: inode_refcount[ino] = inode_refcount[ino] + 1 + print " bumping refcount for '%s' to : %d" % (full, inode_refcount[ino]) parent_path = dirent['pathname'] - print "process parent: '%s' for '%s'" % (parent_path, full) + #print "process parent: '%s' for '%s'" % (parent_path, full) if not len(parent_path): print "WARNING: zero-length parent: '%s' for '%s' hope it's /)" % (parent_path, full) continue @@ -1806,12 +1813,36 @@ def fsck(self): ##imap_trash_msg(self.imap, dirent['msg']) continue + print "second dirent pass, bumping refcounts for parent directories..." + for full, dirent in path_to_dirent.iteritems(): + parent_path = dirent['pathname'] + parent_dirent = path_to_dirent[parent_path] + parent_ino = parent_dirent[RefInodeTag] + if full == "/": + print "skipping refcount bump for '/', it has enough" + continue + if not inode_refcount.has_key(parent_ino): + print "WARNING: parent: '%s' not seen until second dirent pass" % (parent_path) + inode_refcount[parent_ino] = 0 + inode_refcount[parent_ino] = inode_refcount[parent_ino] + 1 + print "bumping refcount for parent dir of '%s': '%s' to: %d" \ + % (full, parent_path, inode_refcount[parent_ino]) + + inodes_seen = {} print "fetching all inodes..." for msgid, msg in fetch_full_messages(self.imap, inode_uids).items(): + inode_parts = self.parse_inode_msg_subj(msg) ino = inode_parts[InodeTag] + inode_obj = GmailInode(msg, self) + mode = inode_obj.mode + if inode_obj.mode & S_IFDIR: + inode_refcount[ino] = inode_refcount[ino] + 1 + print "bumped refcount for dir ino: %d to : %d" % (ino, inode_refcount[ino]) + inode_obj = None + log_debug2("msgid: %s has ino: %s" % (msgid, ino)) if not inode_refcount.has_key(ino): # FIXME: link into lost+found dir @@ -2506,6 +2537,13 @@ def flush(self, path): dirent = self.lookup_dirent(path) #write_out(dirent, "flush") #write_out(dirent.inode, "flush") + while self.nr_dirty_objects() > 0: + print "there are still dirty objects, sleeping..." + time.sleep(1) + #print "sleeping before fsck" + #time.sleep(5) + #print "now fscking" + #self.fsck() return 0 #@-node:fsync From 614b3b283fd3a50bcef33526f6809992a89d6090 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 28 Feb 2011 15:07:39 -0800 Subject: [PATCH 10/13] fix inadvertent block message deletion the read() code used to delete messages after it read them. Whoops. - self.populate_buffer(1) + self.populate_buffer(KEEP_AFTER_READ) --- gmailfs.py | 67 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 3da6862..4cdca62 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -122,6 +122,9 @@ GMAILFS_VERSION = '5' PATHNAME_MAX = 256 +DELETE_AFTER_READ = 1 +KEEP_AFTER_READ = 0 + PathStartDelim = '__a__' PathEndDelim = '__b__' FileStartDelim = '__c__' @@ -1255,7 +1258,7 @@ def __init__(self, inode, block_nr): self.start_offset = self.block_nr * self.block_size self.end_offset = self.start_offset + self.block_size self.ts = time.time() - log_debug1("created new block: %d" % (self.block_nr)) + log_debug1("created new GmailBlock: %d for inode: %d" % (self.block_nr, inode.ino)) gmail_blocks[self] = self def to_str(self): @@ -1298,7 +1301,7 @@ def write(self, buf, off): self.buffer = list(" "*self.block_size) self.buffer_lock.release() else: - self.populate_buffer(1) + self.populate_buffer(DELETE_AFTER_READ) buf_write_start = file_off - self.start_offset buf_write_end = buf_write_start + len(buf_part) @@ -1318,7 +1321,7 @@ def write(self, buf, off): self.buffer_lock.release() log_debug1("wrote block range: [%d:%d]" % (buf_write_start, buf_write_end)) - log_debug1("block write() setting dirty") + log_debug1("block write() setting GmailBlock dirty") self.mark_dirty("file write") if file_off + len(buf_part) > self.inode.size: @@ -1379,7 +1382,7 @@ def read(self, readlen, file_off): readlen = min(self.inode.size - file_off, readlen) log_debug1("read block: %d" % (self.block_nr)) - self.populate_buffer(1) + self.populate_buffer(KEEP_AFTER_READ) start_offset = max(file_off, self.start_offset) end_offset = min(file_off + readlen, self.end_offset) start_offset -= self.start_offset @@ -1403,7 +1406,7 @@ def populate_buffer(self, deleteAfter): q2 = 'x='+str(self.block_nr) msg = getSingleMessageByQuery("block read", self.inode.fs.imap, [ q1, q2 ]) if msg == None: - log_debug2("readFromGmail(): file has no blocks, returning empty contents (%s %s)" % (q1, q2)) + log_debug1("readFromGmail(): file has no blocks, returning empty contents (%s %s)" % (q1, q2)) self.buffer = list(" "*self.block_size) self.buffer_lock.release() return @@ -1420,7 +1423,8 @@ def populate_buffer(self, deleteAfter): log_debug3("after loop, a: '%s'" % str(a)) a = list(a) - if deleteAfter: + if deleteAfter == DELETE_AFTER_READ: + log_debug1("populate_buffer() deleting msg: '%s'" % (msg.uid)); imap_trash_msg(self.inode.fs.imap, msg) contentList = list(" "*self.block_size) contentList[0:] = a @@ -1439,7 +1443,7 @@ def kick_imap(self, imap): self.disconnect_from_server(imap) print("disonnected") try: - sys.stderr.write("connecting to server..." % (i)) + sys.stderr.write("connecting to server...") self.connect_to_server(imap) sys.stderr.write("done\n") except Exception, e: @@ -1494,6 +1498,8 @@ def connect_to_server(self, imap = None): return imap def get_imap(self): + if self.early: + return self.imap imap = None timeout = 1 block = 1 @@ -1510,6 +1516,8 @@ def get_imap(self): return imap def put_imap(self, imap): + if self.early: + return self.imap_pool.put(imap) def drain_nonblocking_dirty_queue(self): @@ -1580,11 +1588,17 @@ def get_dirty_object(self): #@+node:__init__ def __init__(self, extraOpts, mountpoint, *args, **kw): Fuse.__init__(self, *args, **kw) + self.dirty_objects = Queue.Queue(50) + self.dirty_objects_nonblocking = Queue.Queue() + self.lookup_lock = threading.Semaphore(1) + self.inode_cache_lock = threading.Semaphore(1) self.imap = self.connect_to_server() + self.early = 1; if "IMAPFS_FSCK" in os.environ: self.fsck() exit(0) + self.early = 0; self.nr_imap_threads = 3 if "IMAPFS_NR_THREADS" in os.environ: @@ -1595,11 +1609,6 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): self.imap_pool.put(self.connect_to_server()) sys.stderr.write("done\n") - self.dirty_objects = Queue.Queue(50) - self.dirty_objects_nonblocking = Queue.Queue() - self.lookup_lock = threading.Semaphore(1) - self.inode_cache_lock = threading.Semaphore(1) - self.fuse_args.mountpoint = mountpoint self.fuse_args.setmod('foreground') self.optdict = extraOpts @@ -1722,6 +1731,11 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): #@+node:attribs flags = 1 + def fsck_trash_msg(self, msg): + if not "IMAPFS_FSCK_CAN_WRITE" in os.environ: + return + imap_trash_msg(self.imap, msg) + #@-node:attribs def fsck(self): @@ -1770,12 +1784,12 @@ def fsck(self): if existing['msg'].uid > msgid: # throw away the current message that # we're looking at - ##imap_trash_msg(self.imap, msg) # and forget that we ever saw it + self.fsck_trash_msg(msg) continue else: # throw away the message that was there - ##imap_trash_msg(self.imap, existing['msg']) + self.fsck_trash_msg(existing['msg']) # not stricly necessary, but clearer directory.pop(filename) directory[filename] = dirent_parts @@ -1810,7 +1824,7 @@ def fsck(self): continue if not path_to_dirent.has_key(parent_path): print "ERROR: could not find parent entry '%s'" % (parent_path) - ##imap_trash_msg(self.imap, dirent['msg']) + self.fsck_trash_msg(dirent['msg']) continue print "second dirent pass, bumping refcounts for parent directories..." @@ -1847,7 +1861,7 @@ def fsck(self): if not inode_refcount.has_key(ino): # FIXME: link into lost+found dir print "ERROR: unlinked inode: '%s'" % (ino) - ##imap_trash_msg(self.imap, msg) + self.fsck_trash_msg(msg) continue if inodes_seen.has_key(ino): existing = inodes_seen[ino] @@ -1855,12 +1869,12 @@ def fsck(self): if existing['msg'].uid > msgid: # throw away the current message that # we're looking at - ##imap_trash_msg(self.imap, msg) + self.fsck_trash_msg(msg) # and forget that we ever saw it continue else: # throw away the message that was there - ##imap_trash_msg(self.imap, existing['msg']) + self.fsck_trash_msg(existing['msg']) # not stricly necessary, but clearer inodes_seen.pop(ino) inode_parts['msg'] = msg @@ -1870,13 +1884,16 @@ def fsck(self): counted_nr_links = inode_refcount[ino] if stored_nr_links != counted_nr_links: print "WARNING: ino: %s claims to have %s links, but we counted %s" % (ino, stored_nr_links, counted_nr_links) + if "IMAPFS_FSCK_CAN_WRITE" in os.environ: + print "fixing inode link count: %s" % (str(ino)) + inode = GmailInode(msg, self) + inode.i_nlink = counted_nr_links + inode.mark_dirty("fsck") + inode.i_write_out("fsck") continue print "GOOD: linked inode: '%s' i_nlink: %d" % (ino, inode_refcount[ino]) - - - class GmailStat(fuse.Stat): def __init__(self): self.st_mode = 0 @@ -2392,7 +2409,6 @@ def open(self, path, flags): #@+node:read def read(self, path, readlen, offset): - log_entry("read") try: log_debug1("gmailfs.py:Gmailfs:read(len=%d, offset=%d, path='%s')" % (readlen, offset, path)) @@ -2540,10 +2556,9 @@ def flush(self, path): while self.nr_dirty_objects() > 0: print "there are still dirty objects, sleeping..." time.sleep(1) - #print "sleeping before fsck" - #time.sleep(5) - #print "now fscking" - #self.fsck() + if "IMAPFS_FSCK_ON_FLUSH" in os.environ: + print "now fscking" + self.fsck() return 0 #@-node:fsync From 2c92d8a31ed20a7b84e6cd23c77f4cfc353e5d23 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 28 Feb 2011 17:04:56 -0800 Subject: [PATCH 11/13] fix nlink tracking There was a bug both in directory creation and another in unlink() that caused nlink screwups. Those are fixed. This also continues to improve the new fsck code. --- gmailfs.py | 115 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 45 deletions(-) diff --git a/gmailfs.py b/gmailfs.py index 4cdca62..4360b44 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -393,6 +393,11 @@ def imap_getquotaroot(imap, fsNameVar): # # does python have a ... operator like c preprocessor? def uid_cmd(imap, cmd, uids, arg1, arg2 = None, arg3 = None): + # there's something funky going on with gmail. It seems to not synchronize + # bewtween different IMAP connections. You might ask for all the messages + # in two threads and get different responses. Running imap.select() seems + # to synchronize it again. + imap.select(fsNameVar) semget(imap.lock) ret = __uid_cmd(imap, cmd, uids, arg1, arg2, arg3) imap.lock.release() @@ -956,6 +961,10 @@ def d_unlink(self): log_error("\tdl path: '%s' uid: '%s' obj: %s" % (deleted.path(), str(deleted.dirent_msg.uid), str(deleted))) self.fs.lookup_lock.release() + parentdir, name = parse_path(self.path()) + parentdirinode = self.fs.lookup_inode(parentdir) + parentdirinode.i_nlink -= 1 + parentdirinode.mark_dirty("d_unlink() for parent dir") #@-node:class GmailDirent @@ -1400,13 +1409,13 @@ def populate_buffer(self, deleteAfter): if len(self.buffer): self.buffer_lock.release() return - log_debug1("populate_buffer() filling block %d because len: %d" % (self.block_nr, len(self.buffer))) + log_debug2("populate_buffer() filling block %d because len: %d" % (self.block_nr, len(self.buffer))) q1 = 'b='+str(self.inode.ino) q2 = 'x='+str(self.block_nr) msg = getSingleMessageByQuery("block read", self.inode.fs.imap, [ q1, q2 ]) if msg == None: - log_debug1("readFromGmail(): file has no blocks, returning empty contents (%s %s)" % (q1, q2)) + log_debug2("readFromGmail(): file has no blocks, returning empty contents (%s %s)" % (q1, q2)) self.buffer = list(" "*self.block_size) self.buffer_lock.release() return @@ -1429,7 +1438,7 @@ def populate_buffer(self, deleteAfter): contentList = list(" "*self.block_size) contentList[0:] = a self.buffer = contentList - print("populate_buffer() filled block %d with len: %d" % (self.block_nr, len(self.buffer))) + log_debug2("populate_buffer() filled block %d with len: %d" % (self.block_nr, len(self.buffer))) self.buffer_lock.release() #@-node:class OpenGmailFile @@ -1585,6 +1594,20 @@ def get_dirty_object(self): log_debug3("get_dirty_object() found nothing") return None + def imap_get_all_uids(self, imap): + semget(imap.lock) + tmpdebug = imap.debug + imap.debug = 2 + #imap.close() + imap.select(fsNameVar) + resp, msgids = imap_uid(imap, "SEARCH", 'ALL') + print "imap_get_all_uids: resp: %s msgids: %s" % (resp, msgids) + uids = msgids[0].split() + print ("%d messages found..." % (len(uids))) + imap.lock.release() + imap.debug = tmpdebug + return uids + #@+node:__init__ def __init__(self, extraOpts, mountpoint, *args, **kw): Fuse.__init__(self, *args, **kw) @@ -1602,7 +1625,7 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): self.nr_imap_threads = 3 if "IMAPFS_NR_THREADS" in os.environ: - self.nr_imap_threads = os.environ['IMAPFS_NR_THREADS'] + self.nr_imap_threads = int(os.environ['IMAPFS_NR_THREADS']) self.imap_pool = Queue.Queue(self.nr_imap_threads) for i in range(self.nr_imap_threads): sys.stderr.write("connecting thread %d to server..." % (i)) @@ -1682,26 +1705,19 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): trash_all = 1 if trash_all: print("deleting existing messages...") - semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') - self.imap.lock.release() - uids = msgids[0].split() - print ("%d found..." % (len(uids))) - joined_uids = string.join(msgids[0].split(), ",") - log_debug2("about to delete msgids: ->%s<-" % (joined_uids)) + uids = self.imap_get_all_uids(self.imap) if (len(uids)): imap_trash_uids(self.imap, uids) - print("done deleting %d existing messages" % (len(msgids[0].split()))) + print("done deleting %d existing messages" % (len(uids))) + print("mailbox now has %d messages" % (len(self.imap_get_all_uids(self.imap)))) semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') + expunged = self.imap.expunge() self.imap.lock.release() - print("mailbox now has %d messages" % (len(msgids[0].split()))) - self.imap.expunge() + print("mailbox expunged: %s" % str(expunged)) + print("mailbox now has %d messages" % (len(self.imap_get_all_uids(self.imap)))) - semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') self.imap.lock.release() - print("mailbox now has %d messages" % (len(msgids[0].split()))) + print("mailbox has %d messages" % (len(self.imap_get_all_uids(self.imap)))) #exit(0) #elf.mythread() @@ -1709,16 +1725,17 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): path = "/" inode = self.lookup_inode(path) if (inode == None) and (path == '/'): + # I would eventually like to see this done in a mkfs-style command log_info("creating root inode") mode = S_IFDIR|S_IRUSR|S_IXUSR|S_IWUSR|S_IRGRP|S_IXGRP|S_IXOTH|S_IROTH inode = self.mk_inode(mode, 1) # "/" is special and gets an extra link. # It will always appear to have an nlink of 3 # even when it is empty - inode.i_nlink = inode.i_nlink + 2 + inode.i_nlink = inode.i_nlink + 1 dirent = self.link_inode(path, inode) - write_out(inode, "new root inode") - write_out(dirent, "new root dirent") + #write_out(inode, "new root inode") + #write_out(dirent, "new root dirent") log_info("root inode uids: %s %s" % (dirent.dirent_msg.uid, inode.inode_msg.uid)) inode = self.lookup_inode(path) if inode == None: @@ -1733,22 +1750,20 @@ def __init__(self, extraOpts, mountpoint, *args, **kw): def fsck_trash_msg(self, msg): if not "IMAPFS_FSCK_CAN_WRITE" in os.environ: + print "fsck_trash_msg() can not write, so skipping fix" return imap_trash_msg(self.imap, msg) #@-node:attribs def fsck(self): - print ("fsck()") - semget(self.imap.lock) - resp, msgids = imap_uid(self.imap, "SEARCH", 'ALL') - self.imap.lock.release() - uids = msgids[0].split() - print ("%d messages found..." % (len(uids))) - joined_uids = string.join(msgids[0].split(), ",") - log_debug2("about to delete msgids: ->%s<-" % (joined_uids)) + self.imap.select(fsNameVar) + uids = self.imap_get_all_uids(self.imap) + print ("fsck: %d messages found..." % (len(uids))) + joined_uids = string.join(uids, ",") + log_debug1("fsck found msgids: ->%s<-" % (joined_uids)) if (len(uids) == 0): - print ("empty mailbox") + print ("fsck: empty mailbox") return # def parse_inode_msg_subj(self, inode_msg): # def parse_dirent_msg(self, msg): @@ -1832,9 +1847,9 @@ def fsck(self): parent_path = dirent['pathname'] parent_dirent = path_to_dirent[parent_path] parent_ino = parent_dirent[RefInodeTag] - if full == "/": - print "skipping refcount bump for '/', it has enough" - continue + #if full == "/": + # print "skipping refcount bump for '/', it has enough" + # continue if not inode_refcount.has_key(parent_ino): print "WARNING: parent: '%s' not seen until second dirent pass" % (parent_path) inode_refcount[parent_ino] = 0 @@ -1884,12 +1899,12 @@ def fsck(self): counted_nr_links = inode_refcount[ino] if stored_nr_links != counted_nr_links: print "WARNING: ino: %s claims to have %s links, but we counted %s" % (ino, stored_nr_links, counted_nr_links) - if "IMAPFS_FSCK_CAN_WRITE" in os.environ: - print "fixing inode link count: %s" % (str(ino)) - inode = GmailInode(msg, self) - inode.i_nlink = counted_nr_links - inode.mark_dirty("fsck") - inode.i_write_out("fsck") + #if "IMAPFS_FSCK_CAN_WRITE" in os.environ: + # print "fixing inode link count: %s" % (str(ino)) + # inode = GmailInode(msg, self) + # inode.i_nlink = counted_nr_links + # inode.mark_dirty("fsck") + # inode.i_write_out("fsck") continue print "GOOD: linked inode: '%s' i_nlink: %d" % (ino, inode_refcount[ino]) @@ -2350,6 +2365,12 @@ def link_inode(self, path, inode): dirent = self.mk_dirent(inode, path) inode.i_nlink = inode.i_nlink + 1 inode.mark_dirty("link_inode()") + + parentdir, name = parse_path(path) + log_debug1("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) + parentdirinode = self.lookup_inode(parentdir) + parentdirinode.i_nlink += 1 + parentdirinode.mark_dirty("link_inode() for parent dir") return dirent def lookup_inode(self, path): @@ -2368,11 +2389,6 @@ def mkdir(self, path, mode): # extra link for for '.' inode.i_nlink = inode.i_nlink + 1 self.link_inode(path, inode) - parentdir, name = parse_path(path) - log_debug1("mkdir() parentdir: '%s' name: '%s'" % (parentdir, name)) - parentdirinode = self.lookup_inode(parentdir) - parentdirinode.i_nlink += 1 - parentdirinode.mark_dirty("mkdir") #@-node:mkdir #@+node:utime @@ -2524,6 +2540,9 @@ def statfs(self): st.f_bavail = blocks_avail st.f_files = files st.f_ffree = files_free + if "IMAPFS_FSCK_ON_STATFS" in os.environ: + print "now fscking" + self.fsck() return st #@-node:statfs @@ -2536,6 +2555,9 @@ def fsync(self, path, isfsyncfile): write_out(inode, "fsync_inode") #for block in inode._blocks: # write_out(block, "fsync_blocks") + if "IMAPFS_FSCK_ON_FLUSH" in os.environ: + print "now fscking" + self.fsck() return 0 #@-node:fsync @@ -2543,7 +2565,10 @@ def fsync(self, path, isfsyncfile): def fsyncdir(self, path, isfsyncfile): log_entry("gmailfs.py:Gmailfs:fsyncdir: path=%s, isfsyncfile=%s" % (path, isfsyncfile)) log_info("gmailfs.py:Gmailfs:fsyncdir: path=%s, isfsyncfile=%s" % (path, isfsyncfile)) - return -ENOSYS + if "IMAPFS_FSCK_ON_FLUSH" in os.environ: + print "now fscking" + self.fsck() + return -ENOSYS #@-node:fsync From d91869f6c036c341e210914b744e96547688d813 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 6 Jun 2011 16:12:19 -0700 Subject: [PATCH 12/13] add better debugging message about sleeping --- gmailfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gmailfs.py b/gmailfs.py index 4360b44..8161d56 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -2579,7 +2579,7 @@ def flush(self, path): #write_out(dirent, "flush") #write_out(dirent.inode, "flush") while self.nr_dirty_objects() > 0: - print "there are still dirty objects, sleeping..." + print "flush: there are still %d dirty objects, sleeping..." % (self.nr_dirty_objects()) time.sleep(1) if "IMAPFS_FSCK_ON_FLUSH" in os.environ: print "now fscking" From f73118a2ea7d540805bf39d1d154faf8a64c7376 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 28 Nov 2011 09:53:33 -0800 Subject: [PATCH 13/13] fix string.replace This probably never worked. Fix from adam.bernstein@pobox.com: The problem is replace doesn't map a set of characters, just the literal string as the first argument. You need something like translate like the above code. Maybe in your tests, you see the literal sequence "\r\n\t" you want to translate to a " ". Or maybe your replace behaves differently in python 2.7.2+. I don't know. This patch fixes me. If it works for you, then call it good. After making this change, I now have a gmail filesystem mounted. --- gmailfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gmailfs.py b/gmailfs.py index 8161d56..39c8b64 100755 --- a/gmailfs.py +++ b/gmailfs.py @@ -2135,7 +2135,7 @@ def format_dirent_subj(self, str): def parse_dirent_msg(self, msg): subject_re = self.format_dirent_subj('(.*)') - subject = msg['Subject'].replace("\r\n\t", " ") + subject = msg['Subject'].translate(string.maketrans('\r\n\t', ' ')) m = re.match(subject_re, subject) log_debug3("looking for regex: '%s'" % (subject_re)) log_debug3("subject: '%s'" % (subject))