diff --git a/allofplos/article.py b/allofplos/article.py index 4a62a239..c82172eb 100644 --- a/allofplos/article.py +++ b/allofplos/article.py @@ -9,13 +9,14 @@ from . import get_corpus_dir from .transformations import (filename_to_doi, _get_base_page, LANDING_PAGE_SUFFIX, - URL_SUFFIX, plos_page_dict, doi_url) + URL_SUFFIX, plos_page_dict, doi_url, doi_to_url, doi_to_path) from .plos_regex import validate_doi from .elements import (parse_article_date, get_contrib_info, Journal, License, match_contribs_to_dicts) +from .utils import dedent -class Article(): +class Article: """The primary object of a PLOS article, initialized by a valid PLOS DOI. """ @@ -44,6 +45,61 @@ def __init__(self, doi, directory=None): self.reset_memoized_attrs() self._editor = None + def __str__(self, exclude_refs=True): + """Output when you print an article object on the command line. + + For parsing and viewing the XML of a local article. Should not be used for hashing + Excludes element (including references list) for easier viewing + :param exclude_refs: remove references from the article tree (eases print viewing) + """ + parser = et.XMLParser(remove_blank_text=True) + tree = et.parse(self.filename, parser) + if exclude_refs: + root = tree.getroot() + back = tree.xpath('./back') + root.remove(back[0]) + local_xml = et.tostring(tree, + method='xml', + encoding='unicode', + pretty_print=True) + return local_xml + + def __repr__(self): + """Value of an article object when you call it directly on the command line. + + Shows the DOI and title of the article + :returns: DOI and title + :rtype: {str} + """ + out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title) + return out + + + def _repr_html_(self): + """Nice display for Jupyter notebook""" + + titlestyle = 'display:inline-flex;' + titletextstyle = 'margin-left:.5em;' + titlelink = ('' + '{title}').format( + url=self.page, + title=self.title, + titlestyle=titlestyle+titletextstyle, + ) + + doilink = '{doi}'.format( + url=self.doi_link(), + doi=self.doi, + ) + out = dedent("""
+ Title: {titlelink}
+ DOI: {doilink} +
+ """).format(doilink=doilink, titlelink=titlelink, titlestyle=titlestyle) + + return out + + def reset_memoized_attrs(self): """Reset attributes to None when instantiating a new article object. @@ -109,34 +165,6 @@ def doi(self, d): self.reset_memoized_attrs() self._doi = d - def __str__(self, exclude_refs=True): - """Output when you print an article object on the command line. - - For parsing and viewing the XML of a local article. Should not be used for hashing - Excludes element (including references list) for easier viewing - :param exclude_refs: remove references from the article tree (eases print viewing) - """ - parser = et.XMLParser(remove_blank_text=True) - tree = et.parse(self.filename, parser) - if exclude_refs: - root = tree.getroot() - back = tree.xpath('./back') - root.remove(back[0]) - local_xml = et.tostring(tree, - method='xml', - encoding='unicode', - pretty_print=True) - return local_xml - - def __repr__(self): - """Value of an article object when you call it directly on the command line. - - Shows the DOI and title of the article - :returns: DOI and title - :rtype: {str} - """ - out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title) - return out def doi_link(self): """The link of the DOI, which redirects to the journal URL.""" diff --git a/allofplos/utils.py b/allofplos/utils.py new file mode 100644 index 00000000..02d1cbb8 --- /dev/null +++ b/allofplos/utils.py @@ -0,0 +1,27 @@ +import textwrap + +def dedent(text): + """Equivalent of textwrap.dedent that ignores unindented first line. + This means it will still dedent strings like: + '''foo + is a bar + ''' + For use in wrap_paragraphs. + + Taken from https://github.com/ipython/ipython_genutils/text.py + """ + + if text.startswith('\n'): + # text starts with blank line, don't ignore the first line + return textwrap.dedent(text) + + # split first line + splits = text.split('\n',1) + if len(splits) == 1: + # only one line + return textwrap.dedent(text) + + first, rest = splits + # dedent everything but the first line + rest = textwrap.dedent(rest) + return '\n'.join([first, rest])