Sourcery Starbot ⭐ refactored scyclops/Readable-Feeds#1
Sourcery Starbot ⭐ refactored scyclops/Readable-Feeds#1SourceryAI wants to merge 1 commit intoscyclops:masterfrom
Conversation
SourceryAI
left a comment
There was a problem hiding this comment.
Due to GitHub API limits, only the first 60 comments can be shown.
| if (isinstance(newChild, basestring) | ||
| or isinstance(newChild, unicode)) \ | ||
| and not isinstance(newChild, NavigableString): | ||
| newChild = NavigableString(newChild) | ||
|
|
||
| position = min(position, len(self.contents)) | ||
| if hasattr(newChild, 'parent') and newChild.parent != None: | ||
| # We're 'inserting' an element that's already one | ||
| # of this object's children. | ||
| if newChild.parent == self: | ||
| index = self.find(newChild) | ||
| if index and index < position: | ||
| # Furthermore we're moving it further down the | ||
| # list of this object's children. That means that | ||
| # when we extract this element, our target index | ||
| # will jump down one. | ||
| position = position - 1 | ||
| newChild.extract() | ||
|
|
||
| newChild.parent = self | ||
| previousChild = None | ||
| if position == 0: | ||
| newChild.previousSibling = None | ||
| newChild.previous = self | ||
| else: | ||
| previousChild = self.contents[position-1] | ||
| newChild.previousSibling = previousChild | ||
| newChild.previousSibling.nextSibling = newChild | ||
| newChild.previous = previousChild._lastRecursiveChild() | ||
| if newChild.previous: | ||
| newChild.previous.next = newChild | ||
|
|
||
| newChildsLastElement = newChild._lastRecursiveChild() | ||
|
|
||
| if position >= len(self.contents): | ||
| newChild.nextSibling = None | ||
|
|
||
| parent = self | ||
| parentsNextSibling = None | ||
| while not parentsNextSibling: | ||
| parentsNextSibling = parent.nextSibling | ||
| parent = parent.parent | ||
| if not parent: # This is the last element in the document. | ||
| break | ||
| if parentsNextSibling: | ||
| newChildsLastElement.next = parentsNextSibling | ||
| else: | ||
| newChildsLastElement.next = None | ||
| else: | ||
| nextChild = self.contents[position] | ||
| newChild.nextSibling = nextChild | ||
| if newChild.nextSibling: | ||
| newChild.nextSibling.previousSibling = newChild | ||
| newChildsLastElement.next = nextChild | ||
|
|
||
| if newChildsLastElement.next: | ||
| newChildsLastElement.next.previous = newChildsLastElement | ||
| self.contents.insert(position, newChild) | ||
| if (isinstance( | ||
| newChild, | ||
| (basestring, unicode))) and not isinstance(newChild, NavigableString): | ||
| newChild = NavigableString(newChild) | ||
|
|
||
| position = min(position, len(self.contents)) | ||
| if hasattr(newChild, 'parent') and newChild.parent != None: | ||
| # We're 'inserting' an element that's already one | ||
| # of this object's children. | ||
| if newChild.parent == self: | ||
| index = self.find(newChild) | ||
| if index and index < position: | ||
| # Furthermore we're moving it further down the | ||
| # list of this object's children. That means that | ||
| # when we extract this element, our target index | ||
| # will jump down one. | ||
| position = position - 1 | ||
| newChild.extract() | ||
|
|
||
| newChild.parent = self | ||
| previousChild = None | ||
| if position == 0: | ||
| newChild.previousSibling = None | ||
| newChild.previous = self | ||
| else: | ||
| previousChild = self.contents[position-1] | ||
| newChild.previousSibling = previousChild | ||
| newChild.previousSibling.nextSibling = newChild | ||
| newChild.previous = previousChild._lastRecursiveChild() | ||
| if newChild.previous: | ||
| newChild.previous.next = newChild | ||
|
|
||
| newChildsLastElement = newChild._lastRecursiveChild() | ||
|
|
||
| if position >= len(self.contents): | ||
| newChild.nextSibling = None | ||
|
|
||
| parent = self | ||
| parentsNextSibling = None | ||
| while not parentsNextSibling: | ||
| parentsNextSibling = parent.nextSibling | ||
| parent = parent.parent | ||
| if not parent: # This is the last element in the document. | ||
| break | ||
| newChildsLastElement.next = parentsNextSibling if parentsNextSibling else None | ||
| else: | ||
| nextChild = self.contents[position] | ||
| newChild.nextSibling = nextChild | ||
| if newChild.nextSibling: | ||
| newChild.nextSibling.previousSibling = newChild | ||
| newChildsLastElement.next = nextChild | ||
|
|
||
| if newChildsLastElement.next: | ||
| newChildsLastElement.next.previous = newChildsLastElement | ||
| self.contents.insert(position, newChild) |
There was a problem hiding this comment.
Function PageElement.insert refactored with the following changes:
- Merge isinstance calls (
merge-isinstance) - Replace if statement with if expression (
assign-if-exp)
| """Returns the closest parent of this Tag that matches the given | ||
| """Returns the closest parent of this Tag that matches the given | ||
| criteria.""" | ||
| # NOTE: We can't use _findOne because findParents takes a different | ||
| # set of arguments. | ||
| r = None | ||
| l = self.findParents(name, attrs, 1) | ||
| if l: | ||
| r = l[0] | ||
| return r | ||
| return l[0] if (l := self.findParents(name, attrs, 1)) else None |
There was a problem hiding this comment.
Function PageElement.findParent refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression) - Move setting of default value for variable into
elsebranch (introduce-default-else) - Replace if statement with if expression (
assign-if-exp) - Inline variable that is immediately returned (
inline-immediately-returned-variable)
This removes the following comments ( why? ):
# NOTE: We can't use _findOne because findParents takes a different
# set of arguments.
| r = None | ||
| l = method(name, attrs, text, 1, **kwargs) | ||
| if l: | ||
| r = l[0] | ||
| return r | ||
| return l[0] if (l := method(name, attrs, text, 1, **kwargs)) else None |
There was a problem hiding this comment.
Function PageElement._findOne refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression) - Move setting of default value for variable into
elsebranch (introduce-default-else) - Replace if statement with if expression (
assign-if-exp) - Inline variable that is immediately returned (
inline-immediately-returned-variable)
| "Iterates over a generator looking for things that match." | ||
|
|
||
| if isinstance(name, SoupStrainer): | ||
| strainer = name | ||
| else: | ||
| # Build a SoupStrainer | ||
| strainer = SoupStrainer(name, attrs, text, **kwargs) | ||
| results = ResultSet(strainer) | ||
| g = generator() | ||
| while True: | ||
| try: | ||
| i = g.next() | ||
| except StopIteration: | ||
| "Iterates over a generator looking for things that match." | ||
|
|
||
| if isinstance(name, SoupStrainer): | ||
| strainer = name | ||
| else: | ||
| # Build a SoupStrainer | ||
| strainer = SoupStrainer(name, attrs, text, **kwargs) | ||
| results = ResultSet(strainer) | ||
| g = generator() | ||
| while True: | ||
| try: | ||
| i = g.next() | ||
| except StopIteration: | ||
| break | ||
| if i: | ||
| if found := strainer.search(i): | ||
| results.append(found) | ||
| if limit and len(results) >= limit: | ||
| break | ||
| if i: | ||
| found = strainer.search(i) | ||
| if found: | ||
| results.append(found) | ||
| if limit and len(results) >= limit: | ||
| break | ||
| return results | ||
| return results |
There was a problem hiding this comment.
Function PageElement._findAll refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression)
| """Encodes an object to a string in some encoding, or to Unicode. | ||
| """Encodes an object to a string in some encoding, or to Unicode. | ||
| .""" | ||
| if isinstance(s, unicode): | ||
| if encoding: | ||
| s = s.encode(encoding) | ||
| elif isinstance(s, str): | ||
| if encoding: | ||
| s = s.encode(encoding) | ||
| else: | ||
| s = unicode(s) | ||
| else: | ||
| if encoding: | ||
| s = self.toEncoding(str(s), encoding) | ||
| else: | ||
| s = unicode(s) | ||
| return s | ||
| if (isinstance(s, unicode) and encoding | ||
| or not isinstance(s, unicode) and isinstance(s, str) and encoding): | ||
| s = s.encode(encoding) | ||
| elif isinstance(s, unicode): | ||
| pass | ||
| elif isinstance(s, str) or not encoding: | ||
| s = unicode(s) | ||
| else: | ||
| s = self.toEncoding(str(s), encoding) | ||
| return s |
There was a problem hiding this comment.
Function PageElement.toEncoding refactored with the following changes:
- Merge duplicate blocks in conditional (
merge-duplicate-blocks) - Remove redundant conditional [×2] (
remove-redundant-if)
| """Returns true iff this tag has the same name, the same attributes, | ||
| """Returns true iff this tag has the same name, the same attributes, | ||
| and the same contents (recursively) as the given tag. | ||
|
|
||
| NOTE: right now this will return false if two tags have the | ||
| same attributes in a different order. Should this be fixed?""" | ||
| if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): | ||
| return False | ||
| for i in range(0, len(self.contents)): | ||
| if self.contents[i] != other.contents[i]: | ||
| return False | ||
| return True | ||
| if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): | ||
| return False | ||
| return all(self.contents[i] == other.contents[i] | ||
| for i in range(len(self.contents))) |
There was a problem hiding this comment.
Function Tag.__eq__ refactored with the following changes:
- Use any() instead of for loop (
use-any) - Replace range(0, x) with range(x) (
remove-zero-from-range) - Invert any/all to simplify comparisons (
invert-any-all)
| """Used with a regular expression to substitute the | ||
| """Used with a regular expression to substitute the | ||
| appropriate XML entity for an XML special character.""" | ||
| return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" | ||
| return f"&{self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]]};" |
There was a problem hiding this comment.
Function Tag._sub_entity refactored with the following changes:
- Use f-string instead of string concatenation [×2] (
use-fstring-for-concatenation)
| """Returns a string or Unicode representation of this tag and | ||
| """Returns a string or Unicode representation of this tag and | ||
| its contents. To get Unicode, pass None for encoding. | ||
|
|
||
| NOTE: since Python's HTML parser consumes whitespace, this | ||
| method is not certain to reproduce the whitespace present in | ||
| the original string.""" | ||
|
|
||
| encodedName = self.toEncoding(self.name, encoding) | ||
|
|
||
| attrs = [] | ||
| if self.attrs: | ||
| for key, val in self.attrs: | ||
| fmt = '%s="%s"' | ||
| if isString(val): | ||
| if self.containsSubstitutions and '%SOUP-ENCODING%' in val: | ||
| val = self.substituteEncoding(val, encoding) | ||
|
|
||
| # The attribute value either: | ||
| # | ||
| # * Contains no embedded double quotes or single quotes. | ||
| # No problem: we enclose it in double quotes. | ||
| # * Contains embedded single quotes. No problem: | ||
| # double quotes work here too. | ||
| # * Contains embedded double quotes. No problem: | ||
| # we enclose it in single quotes. | ||
| # * Embeds both single _and_ double quotes. This | ||
| # can't happen naturally, but it can happen if | ||
| # you modify an attribute value after parsing | ||
| # the document. Now we have a bit of a | ||
| # problem. We solve it by enclosing the | ||
| # attribute in single quotes, and escaping any | ||
| # embedded single quotes to XML entities. | ||
| if '"' in val: | ||
| fmt = "%s='%s'" | ||
| if "'" in val: | ||
| # TODO: replace with apos when | ||
| # appropriate. | ||
| val = val.replace("'", "&squot;") | ||
|
|
||
| # Now we're okay w/r/t quotes. But the attribute | ||
| # value might also contain angle brackets, or | ||
| # ampersands that aren't part of entities. We need | ||
| # to escape those to XML entities too. | ||
| val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) | ||
|
|
||
| attrs.append(fmt % (self.toEncoding(key, encoding), | ||
| self.toEncoding(val, encoding))) | ||
| close = '' | ||
| closeTag = '' | ||
| if self.isSelfClosing: | ||
| close = ' /' | ||
| else: | ||
| closeTag = '</%s>' % encodedName | ||
|
|
||
| indentTag, indentContents = 0, 0 | ||
| if prettyPrint: | ||
| indentTag = indentLevel | ||
| space = (' ' * (indentTag-1)) | ||
| indentContents = indentTag + 1 | ||
| contents = self.renderContents(encoding, prettyPrint, indentContents) | ||
| if self.hidden: | ||
| s = contents | ||
| else: | ||
| s = [] | ||
| attributeString = '' | ||
| if attrs: | ||
| attributeString = ' ' + ' '.join(attrs) | ||
| if prettyPrint: | ||
| s.append(space) | ||
| s.append('<%s%s%s>' % (encodedName, attributeString, close)) | ||
| if prettyPrint: | ||
| s.append("\n") | ||
| s.append(contents) | ||
| if prettyPrint and contents and contents[-1] != "\n": | ||
| s.append("\n") | ||
| if prettyPrint and closeTag: | ||
| s.append(space) | ||
| s.append(closeTag) | ||
| if prettyPrint and closeTag and self.nextSibling: | ||
| s.append("\n") | ||
| s = ''.join(s) | ||
| return s | ||
| encodedName = self.toEncoding(self.name, encoding) | ||
|
|
||
| attrs = [] | ||
| if self.attrs: | ||
| for key, val in self.attrs: | ||
| fmt = '%s="%s"' | ||
| if isString(val): | ||
| if self.containsSubstitutions and '%SOUP-ENCODING%' in val: | ||
| val = self.substituteEncoding(val, encoding) | ||
|
|
||
| # The attribute value either: | ||
| # | ||
| # * Contains no embedded double quotes or single quotes. | ||
| # No problem: we enclose it in double quotes. | ||
| # * Contains embedded single quotes. No problem: | ||
| # double quotes work here too. | ||
| # * Contains embedded double quotes. No problem: | ||
| # we enclose it in single quotes. | ||
| # * Embeds both single _and_ double quotes. This | ||
| # can't happen naturally, but it can happen if | ||
| # you modify an attribute value after parsing | ||
| # the document. Now we have a bit of a | ||
| # problem. We solve it by enclosing the | ||
| # attribute in single quotes, and escaping any | ||
| # embedded single quotes to XML entities. | ||
| if '"' in val: | ||
| fmt = "%s='%s'" | ||
| if "'" in val: | ||
| # TODO: replace with apos when | ||
| # appropriate. | ||
| val = val.replace("'", "&squot;") | ||
|
|
||
| # Now we're okay w/r/t quotes. But the attribute | ||
| # value might also contain angle brackets, or | ||
| # ampersands that aren't part of entities. We need | ||
| # to escape those to XML entities too. | ||
| val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) | ||
|
|
||
| attrs.append(fmt % (self.toEncoding(key, encoding), | ||
| self.toEncoding(val, encoding))) | ||
| close = '' | ||
| closeTag = '' | ||
| if self.isSelfClosing: | ||
| close = ' /' | ||
| else: | ||
| closeTag = f'</{encodedName}>' | ||
|
|
||
| indentTag, indentContents = 0, 0 | ||
| if prettyPrint: | ||
| indentTag = indentLevel | ||
| space = (' ' * (indentTag-1)) | ||
| indentContents = indentTag + 1 | ||
| contents = self.renderContents(encoding, prettyPrint, indentContents) | ||
| if self.hidden: | ||
| return contents | ||
| s = [] | ||
| attributeString = ' ' + ' '.join(attrs) if attrs else '' | ||
| if prettyPrint: | ||
| s.append(space) | ||
| s.append(f'<{encodedName}{attributeString}{close}>') | ||
| if prettyPrint: | ||
| s.append("\n") | ||
| s.append(contents) | ||
| if prettyPrint: | ||
| if contents and contents[-1] != "\n": | ||
| s.append("\n") | ||
| if closeTag: | ||
| s.append(space) | ||
| s.append(closeTag) | ||
| if prettyPrint and closeTag and self.nextSibling: | ||
| s.append("\n") | ||
| return ''.join(s) |
There was a problem hiding this comment.
Function Tag.__str__ refactored with the following changes:
- Replace interpolated string formatting with f-string [×2] (
replace-interpolation-with-fstring) - Lift return into if (
lift-return-into-if) - Remove unnecessary else after guard condition (
remove-unnecessary-else) - Hoist a repeated condition into a parent condition (
hoist-repeated-if-condition) - Move setting of default value for variable into
elsebranch (introduce-default-else) - Replace if statement with if expression (
assign-if-exp)
| """Recursively destroys the contents of this tree.""" | ||
| contents = [i for i in self.contents] | ||
| for i in contents: | ||
| if isinstance(i, Tag): | ||
| i.decompose() | ||
| else: | ||
| i.extract() | ||
| self.extract() | ||
| """Recursively destroys the contents of this tree.""" | ||
| contents = list(self.contents) | ||
| for i in contents: | ||
| if isinstance(i, Tag): | ||
| i.decompose() | ||
| else: | ||
| i.extract() | ||
| self.extract() |
There was a problem hiding this comment.
Function Tag.decompose refactored with the following changes:
- Replace identity comprehension with call to collection constructor (
identity-comprehension)
| """Return only the first child of this Tag matching the given | ||
| """Return only the first child of this Tag matching the given | ||
| criteria.""" | ||
| r = None | ||
| l = self.findAll(name, attrs, recursive, text, 1, **kwargs) | ||
| if l: | ||
| r = l[0] | ||
| return r | ||
| return (l[0] if (l := self.findAll(name, attrs, recursive, text, 1, ** | ||
| kwargs)) else None) |
There was a problem hiding this comment.
Function Tag.find refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression) - Move setting of default value for variable into
elsebranch (introduce-default-else) - Replace if statement with if expression (
assign-if-exp) - Inline variable that is immediately returned (
inline-immediately-returned-variable)
| """Extracts a list of Tag objects that match the given | ||
| """Extracts a list of Tag objects that match the given |
There was a problem hiding this comment.
Function Tag.findAll refactored with the following changes:
- Move setting of default value for variable into
elsebranch (introduce-default-else) - Swap if/else branches of if expression to remove negation (
swap-if-expression) - Replace if statement with if expression (
assign-if-exp)
| """Initializes a map representation of this tag's attributes, | ||
| """Initializes a map representation of this tag's attributes, | ||
| if not already initialized.""" | ||
| if not getattr(self, 'attrMap'): | ||
| self.attrMap = {} | ||
| for (key, value) in self.attrs: | ||
| self.attrMap[key] = value | ||
| return self.attrMap | ||
| if not getattr(self, 'attrMap'): | ||
| self.attrMap = dict(self.attrs) | ||
| return self.attrMap |
There was a problem hiding this comment.
Function Tag._getAttrMap refactored with the following changes:
- Convert for loop into dictionary comprehension (
dict-comprehension) - Replace identity comprehension with call to collection constructor (
identity-comprehension)
| for i in range(0, len(self.contents)): | ||
| yield self.contents[i] | ||
| raise StopIteration | ||
| for i in range(len(self.contents)): | ||
| yield self.contents[i] | ||
| raise StopIteration |
There was a problem hiding this comment.
Function Tag.childGenerator refactored with the following changes:
- Replace range(0, x) with range(x) (
remove-zero-from-range)
| if self.text: | ||
| return self.text | ||
| else: | ||
| return "%s|%s" % (self.name, self.attrs) | ||
| return self.text if self.text else f"{self.name}|{self.attrs}" |
There was a problem hiding this comment.
Function SoupStrainer.__str__ refactored with the following changes:
- Replace if statement with if expression (
assign-if-exp) - Replace interpolated string formatting with f-string (
replace-interpolation-with-fstring)
| found = None | ||
| markup = None | ||
| if isinstance(markupName, Tag): | ||
| markup = markupName | ||
| markupAttrs = markup | ||
| callFunctionWithTagData = callable(self.name) \ | ||
| and not isinstance(markupName, Tag) | ||
|
|
||
| if (not self.name) \ | ||
| or callFunctionWithTagData \ | ||
| or (markup and self._matches(markup, self.name)) \ | ||
| or (not markup and self._matches(markupName, self.name)): | ||
| if callFunctionWithTagData: | ||
| match = self.name(markupName, markupAttrs) | ||
| else: | ||
| match = True | ||
| markupAttrMap = None | ||
| for attr, matchAgainst in self.attrs.items(): | ||
| if not markupAttrMap: | ||
| if hasattr(markupAttrs, 'get'): | ||
| markupAttrMap = markupAttrs | ||
| else: | ||
| markupAttrMap = {} | ||
| for k,v in markupAttrs: | ||
| markupAttrMap[k] = v | ||
| attrValue = markupAttrMap.get(attr) | ||
| if not self._matches(attrValue, matchAgainst): | ||
| match = False | ||
| break | ||
| if match: | ||
| if markup: | ||
| found = markup | ||
| else: | ||
| found = markupName | ||
| return found | ||
| found = None | ||
| markup = None | ||
| if isinstance(markupName, Tag): | ||
| markup = markupName | ||
| markupAttrs = markup | ||
| callFunctionWithTagData = callable(self.name) \ | ||
| and not isinstance(markupName, Tag) | ||
|
|
||
| if (not self.name) \ | ||
| or callFunctionWithTagData \ | ||
| or (markup and self._matches(markup, self.name)) \ | ||
| or (not markup and self._matches(markupName, self.name)): | ||
| if callFunctionWithTagData: | ||
| match = self.name(markupName, markupAttrs) | ||
| else: | ||
| match = True | ||
| markupAttrMap = None | ||
| for attr, matchAgainst in self.attrs.items(): | ||
| if not markupAttrMap: | ||
| if hasattr(markupAttrs, 'get'): | ||
| markupAttrMap = markupAttrs | ||
| else: | ||
| markupAttrMap = dict(markupAttrs) | ||
| attrValue = markupAttrMap.get(attr) | ||
| if not self._matches(attrValue, matchAgainst): | ||
| match = False | ||
| break | ||
| if match: | ||
| found = markup if markup else markupName | ||
| return found |
There was a problem hiding this comment.
Function SoupStrainer.searchTag refactored with the following changes:
- Swap positions of nested conditionals [×2] (
swap-nested-ifs) - Hoist nested repeated code outside conditional statements [×2] (
hoist-similar-statement-from-if) - Convert for loop into dictionary comprehension (
dict-comprehension) - Replace if statement with if expression (
assign-if-exp) - Replace identity comprehension with call to collection constructor (
identity-comprehension)
| """Changes a MS smart quote character to an XML or HTML | ||
| """Changes a MS smart quote character to an XML or HTML | ||
| entity.""" | ||
| sub = self.MS_CHARS.get(orig) | ||
| if type(sub) == types.TupleType: | ||
| if self.smartQuotesTo == 'xml': | ||
| sub = '&#x%s;' % sub[1] | ||
| else: | ||
| sub = '&%s;' % sub[0] | ||
| return sub | ||
| sub = self.MS_CHARS.get(orig) | ||
| if type(sub) == types.TupleType: | ||
| sub = f'&#x{sub[1]};' if self.smartQuotesTo == 'xml' else f'&{sub[0]};' | ||
| return sub |
There was a problem hiding this comment.
Function UnicodeDammit._subMSChar refactored with the following changes:
- Replace if statement with if expression (
assign-if-exp) - Replace interpolated string formatting with f-string [×2] (
replace-interpolation-with-fstring)
| '''Given a string and its encoding, decodes the string into Unicode. | ||
| '''Given a string and its encoding, decodes the string into Unicode. | ||
| %encoding is a string recognized by encodings.aliases''' | ||
|
|
||
| # strip Byte Order Mark (if present) | ||
| if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ | ||
| and (data[2:4] != '\x00\x00'): | ||
| encoding = 'utf-16be' | ||
| data = data[2:] | ||
| elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ | ||
| # strip Byte Order Mark (if present) | ||
| if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ | ||
| and (data[2:4] != '\x00\x00'): | ||
| encoding = 'utf-16le' | ||
| data = data[2:] | ||
| elif data[:3] == '\xef\xbb\xbf': | ||
| encoding = 'utf-8' | ||
| data = data[3:] | ||
| elif data[:4] == '\x00\x00\xfe\xff': | ||
| encoding = 'utf-32be' | ||
| data = data[4:] | ||
| elif data[:4] == '\xff\xfe\x00\x00': | ||
| encoding = 'utf-32le' | ||
| data = data[4:] | ||
| newdata = unicode(data, encoding) | ||
| return newdata | ||
| encoding = 'utf-16be' | ||
| data = data[2:] | ||
| elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ | ||
| and (data[2:4] != '\x00\x00'): | ||
| encoding = 'utf-16le' | ||
| data = data[2:] | ||
| elif data[:3] == '\xef\xbb\xbf': | ||
| encoding = 'utf-8' | ||
| data = data[3:] | ||
| elif data[:4] == '\x00\x00\xfe\xff': | ||
| encoding = 'utf-32be' | ||
| data = data[4:] | ||
| elif data[:4] == '\xff\xfe\x00\x00': | ||
| encoding = 'utf-32le' | ||
| data = data[4:] | ||
| return unicode(data, encoding) |
There was a problem hiding this comment.
Function UnicodeDammit._toUnicode refactored with the following changes:
- Inline variable that is immediately returned (
inline-immediately-returned-variable)
| """Given a document, tries to detect its XML encoding.""" | ||
| xml_encoding = sniffed_xml_encoding = None | ||
| try: | ||
| if xml_data[:4] == '\x4c\x6f\xa7\x94': | ||
| # EBCDIC | ||
| xml_data = self._ebcdic_to_ascii(xml_data) | ||
| elif xml_data[:4] == '\x00\x3c\x00\x3f': | ||
| # UTF-16BE | ||
| sniffed_xml_encoding = 'utf-16be' | ||
| xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') | ||
| elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ | ||
| """Given a document, tries to detect its XML encoding.""" | ||
| xml_encoding = sniffed_xml_encoding = None | ||
| try: | ||
| if xml_data[:4] == '\x4c\x6f\xa7\x94': | ||
| # EBCDIC | ||
| xml_data = self._ebcdic_to_ascii(xml_data) | ||
| elif xml_data[:4] == '\x00\x3c\x00\x3f': | ||
| # UTF-16BE | ||
| sniffed_xml_encoding = 'utf-16be' | ||
| xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') | ||
| elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ | ||
| and (xml_data[2:4] != '\x00\x00'): | ||
| # UTF-16BE with BOM | ||
| sniffed_xml_encoding = 'utf-16be' | ||
| xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') | ||
| elif xml_data[:4] == '\x3c\x00\x3f\x00': | ||
| # UTF-16LE | ||
| sniffed_xml_encoding = 'utf-16le' | ||
| xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') | ||
| elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ | ||
| # UTF-16BE with BOM | ||
| sniffed_xml_encoding = 'utf-16be' | ||
| xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') | ||
| elif xml_data[:4] == '\x3c\x00\x3f\x00': | ||
| # UTF-16LE | ||
| sniffed_xml_encoding = 'utf-16le' | ||
| xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') | ||
| elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ | ||
| (xml_data[2:4] != '\x00\x00'): | ||
| # UTF-16LE with BOM | ||
| sniffed_xml_encoding = 'utf-16le' | ||
| xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') | ||
| elif xml_data[:4] == '\x00\x00\x00\x3c': | ||
| # UTF-32BE | ||
| sniffed_xml_encoding = 'utf-32be' | ||
| xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') | ||
| elif xml_data[:4] == '\x3c\x00\x00\x00': | ||
| # UTF-32LE | ||
| sniffed_xml_encoding = 'utf-32le' | ||
| xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') | ||
| elif xml_data[:4] == '\x00\x00\xfe\xff': | ||
| # UTF-32BE with BOM | ||
| sniffed_xml_encoding = 'utf-32be' | ||
| xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') | ||
| elif xml_data[:4] == '\xff\xfe\x00\x00': | ||
| # UTF-32LE with BOM | ||
| sniffed_xml_encoding = 'utf-32le' | ||
| xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') | ||
| elif xml_data[:3] == '\xef\xbb\xbf': | ||
| # UTF-8 with BOM | ||
| sniffed_xml_encoding = 'utf-8' | ||
| xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') | ||
| else: | ||
| sniffed_xml_encoding = 'ascii' | ||
| pass | ||
| except: | ||
| xml_encoding_match = None | ||
| xml_encoding_match = re.compile( | ||
| '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) | ||
| if not xml_encoding_match and isHTML: | ||
| regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) | ||
| xml_encoding_match = regexp.search(xml_data) | ||
| if xml_encoding_match is not None: | ||
| xml_encoding = xml_encoding_match.groups()[0].lower() | ||
| if isHTML: | ||
| self.declaredHTMLEncoding = xml_encoding | ||
| if sniffed_xml_encoding and \ | ||
| (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', | ||
| 'iso-10646-ucs-4', 'ucs-4', 'csucs4', | ||
| 'utf-16', 'utf-32', 'utf_16', 'utf_32', | ||
| 'utf16', 'u16')): | ||
| xml_encoding = sniffed_xml_encoding | ||
| return xml_data, xml_encoding, sniffed_xml_encoding | ||
| # UTF-16LE with BOM | ||
| sniffed_xml_encoding = 'utf-16le' | ||
| xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') | ||
| elif xml_data[:4] == '\x00\x00\x00\x3c': | ||
| # UTF-32BE | ||
| sniffed_xml_encoding = 'utf-32be' | ||
| xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') | ||
| elif xml_data[:4] == '\x3c\x00\x00\x00': | ||
| # UTF-32LE | ||
| sniffed_xml_encoding = 'utf-32le' | ||
| xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') | ||
| elif xml_data[:4] == '\x00\x00\xfe\xff': | ||
| # UTF-32BE with BOM | ||
| sniffed_xml_encoding = 'utf-32be' | ||
| xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') | ||
| elif xml_data[:4] == '\xff\xfe\x00\x00': | ||
| # UTF-32LE with BOM | ||
| sniffed_xml_encoding = 'utf-32le' | ||
| xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') | ||
| elif xml_data[:3] == '\xef\xbb\xbf': | ||
| # UTF-8 with BOM | ||
| sniffed_xml_encoding = 'utf-8' | ||
| xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') | ||
| else: | ||
| sniffed_xml_encoding = 'ascii' | ||
| except: | ||
| xml_encoding_match = None | ||
| xml_encoding_match = re.compile( | ||
| '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) | ||
| if not xml_encoding_match and isHTML: | ||
| regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) | ||
| xml_encoding_match = regexp.search(xml_data) | ||
| if xml_encoding_match is not None: | ||
| xml_encoding = xml_encoding_match.groups()[0].lower() | ||
| if isHTML: | ||
| self.declaredHTMLEncoding = xml_encoding | ||
| if sniffed_xml_encoding and \ | ||
| (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', | ||
| 'iso-10646-ucs-4', 'ucs-4', 'csucs4', | ||
| 'utf-16', 'utf-32', 'utf_16', 'utf_32', | ||
| 'utf16', 'u16')): | ||
| xml_encoding = sniffed_xml_encoding | ||
| return xml_data, xml_encoding, sniffed_xml_encoding |
There was a problem hiding this comment.
Function UnicodeDammit._detectEncoding refactored with the following changes:
- Remove redundant pass statement (
remove-redundant-pass)
| if "" == translate(str, idmap, LegalChars): | ||
| if translate(str, idmap, LegalChars) == "": | ||
| return str | ||
| else: | ||
| return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"' | ||
| return f'"{_nulljoin(map(_Translator.get, str, str))}"' |
There was a problem hiding this comment.
Function _quote refactored with the following changes:
- Ensure constant in comparison is on the right (
flip-comparison) - Use f-string instead of string concatenation [×2] (
use-fstring-for-concatenation)
| res.append(str[i:k]) | ||
| res.append(str[k+1]) | ||
| res.extend((str[i:k], str[k+1])) | ||
| i = k+2 | ||
| else: # OctalPatt matched | ||
| res.append(str[i:j]) | ||
| res.append( chr( int(str[j+1:j+4], 8) ) ) | ||
| else: # OctalPatt matched | ||
| res.extend((str[i:j], chr( int(str[j+1:j+4], 8) ))) |
There was a problem hiding this comment.
Function _unquote refactored with the following changes:
- Merge consecutive list appends into a single extend [×2] (
merge-list-appends-into-extend)
| if not K in self._reserved: | ||
| raise CookieError("Invalid Attribute %s" % K) | ||
| if K not in self._reserved: | ||
| raise CookieError(f"Invalid Attribute {K}") |
There was a problem hiding this comment.
Function Morsel.__setitem__ refactored with the following changes:
- Simplify logical expression using De Morgan identities (
de-morgan) - Replace interpolated string formatting with f-string (
replace-interpolation-with-fstring)
| raise CookieError("Illegal key value: %s" % key) | ||
| raise CookieError(f"Attempt to set a reserved key: {key}") | ||
| if translate(key, idmap, LegalChars) != "": | ||
| raise CookieError(f"Illegal key value: {key}") |
There was a problem hiding this comment.
Function Morsel.set refactored with the following changes:
- Replace interpolated string formatting with f-string [×2] (
replace-interpolation-with-fstring) - Ensure constant in comparison is on the right (
flip-comparison)
| return "%s %s" % ( header, self.OutputString(attrs) ) | ||
| return f"{header} {self.OutputString(attrs)}" |
There was a problem hiding this comment.
Function Morsel.output refactored with the following changes:
- Replace interpolated string formatting with f-string (
replace-interpolation-with-fstring)
| return '<%s: %s=%s>' % (self.__class__.__name__, | ||
| self.key, repr(self.value) ) | ||
| return f'<{self.__class__.__name__}: {self.key}={repr(self.value)}>' |
There was a problem hiding this comment.
Function Morsel.__repr__ refactored with the following changes:
- Replace interpolated string formatting with f-string (
replace-interpolation-with-fstring)
| RA("%s=%s" % (self.key, self.coded_value)) | ||
| RA(f"{self.key}={self.coded_value}") |
There was a problem hiding this comment.
Function Morsel.OutputString refactored with the following changes:
- Replace interpolated string formatting with f-string [×3] (
replace-interpolation-with-fstring)
| if self.contentparams['type'].endswith('/xml'): | ||
| return 0 | ||
| return 1 | ||
| return 0 if self.contentparams['type'].endswith('/xml') else 1 |
There was a problem hiding this comment.
Function _FeedParserMixin._isBase64 refactored with the following changes:
- Lift code into else after jump in control flow (
reintroduce-else) - Replace if statement with if expression (
assign-if-exp)
| href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) | ||
| if href: | ||
| if href := attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))): |
There was a problem hiding this comment.
Function _FeedParserMixin._itsAnHrefDamnIt refactored with the following changes:
- Use named expression to simplify assignment and conditional (
use-named-expression)
| versionmap = {'0.91': 'rss091u', | ||
| '0.92': 'rss092', | ||
| '0.93': 'rss093', | ||
| '0.94': 'rss094'} | ||
| if not self.version: | ||
| attr_version = attrsD.get('version', '') | ||
| version = versionmap.get(attr_version) | ||
| if version: | ||
| versionmap = {'0.91': 'rss091u', | ||
| '0.92': 'rss092', | ||
| '0.93': 'rss093', | ||
| '0.94': 'rss094'} | ||
| if version := versionmap.get(attr_version): |
There was a problem hiding this comment.
Function _FeedParserMixin._start_rss refactored with the following changes:
- Move assignments closer to their usage (
move-assign) - Use named expression to simplify assignment and conditional (
use-named-expression)
| versionmap = {'0.1': 'atom01', | ||
| '0.2': 'atom02', | ||
| '0.3': 'atom03'} | ||
| if not self.version: | ||
| attr_version = attrsD.get('version') | ||
| version = versionmap.get(attr_version) | ||
| if version: | ||
| self.version = version | ||
| else: | ||
| self.version = 'atom' | ||
| versionmap = {'0.1': 'atom01', | ||
| '0.2': 'atom02', | ||
| '0.3': 'atom03'} | ||
| self.version = version if (version := versionmap.get(attr_version)) else 'atom' |
There was a problem hiding this comment.
Function _FeedParserMixin._start_feed refactored with the following changes:
- Move assignments closer to their usage (
move-assign) - Use named expression to simplify assignment and conditional (
use-named-expression) - Replace if statement with if expression (
assign-if-exp)
| context = self.sourcedata | ||
| return self.sourcedata | ||
| elif self.inentry: | ||
| context = self.entries[-1] | ||
| return self.entries[-1] | ||
| else: | ||
| context = self.feeddata | ||
| return context | ||
| return self.feeddata |
There was a problem hiding this comment.
Function _FeedParserMixin._getContext refactored with the following changes:
- Lift return into if (
lift-return-into-if)
Thanks for starring sourcery-ai/sourcery ✨ 🌟 ✨
Here's your pull request refactoring your most popular Python repo.
If you want Sourcery to refactor all your Python repos and incoming pull requests install our bot.
Review changes via command line
To manually merge these changes, make sure you're on the
masterbranch, then run: