From 8a7c9cef3a2d280d0acdcb19399e0f0d3df54851 Mon Sep 17 00:00:00 2001
From: M Pacer <mpacer@berkeley.edu>
Date: Sat, 3 Mar 2018 17:28:24 -0800
Subject: [PATCH] add pretty _repr_html_ to article to make links when
 displayed in notebook

Add utils module for the dedent method  & home for future utilities
---
 allofplos/article.py | 88 +++++++++++++++++++++++++++++---------------
 allofplos/utils.py   | 27 ++++++++++++++
 2 files changed, 85 insertions(+), 30 deletions(-)
 create mode 100644 allofplos/utils.py
diff --git a/allofplos/article.py b/allofplos/article.py
index 4a62a239..c82172eb 100644
--- a/allofplos/article.py
+++ b/allofplos/article.py
@@ -9,13 +9,14 @@
 
 from . import get_corpus_dir
 from .transformations import (filename_to_doi, _get_base_page, LANDING_PAGE_SUFFIX,
-                              URL_SUFFIX, plos_page_dict, doi_url)
+                              URL_SUFFIX, plos_page_dict, doi_url, doi_to_url, doi_to_path)
 from .plos_regex import validate_doi
 from .elements import (parse_article_date, get_contrib_info,
                        Journal, License, match_contribs_to_dicts)
+from .utils import dedent
 
 
-class Article():
+class Article:
     """The primary object of a PLOS article, initialized by a valid PLOS DOI.
 
     """
@@ -44,6 +45,61 @@ def __init__(self, doi, directory=None):
         self.reset_memoized_attrs()
         self._editor = None
 
+    def __str__(self, exclude_refs=True):
+        """Output when you print an article object on the command line.
+
+        For parsing and viewing the XML of a local article. Should not be used for hashing
+        Excludes <back> element (including references list) for easier viewing
+        :param exclude_refs: remove references from the article tree (eases print viewing)
+        """
+        parser = et.XMLParser(remove_blank_text=True)
+        tree = et.parse(self.filename, parser)
+        if exclude_refs:
+            root = tree.getroot()
+            back = tree.xpath('./back')
+            root.remove(back[0])
+        local_xml = et.tostring(tree,
+                                method='xml',
+                                encoding='unicode',
+                                pretty_print=True)
+        return local_xml
+
+    def __repr__(self):
+        """Value of an article object when you call it directly on the command line.
+
+        Shows the DOI and title of the article
+        :returns: DOI and title
+        :rtype: {str}
+        """
+        out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title)
+        return out
+    
+    
+    def _repr_html_(self):
+        """Nice display for Jupyter notebook"""
+    
+        titlestyle = 'display:inline-flex;'
+        titletextstyle = 'margin-left:.5em;'
+        titlelink = ('<span style="{titlestyle}"><a href="{url}">'
+                     '<em>{title}</em></a></span>').format(
+                        url=self.page,
+                        title=self.title,
+                        titlestyle=titlestyle+titletextstyle,
+                    )
+                    
+        doilink = '<span><a href="{url}"><code>{doi}</code></a></span>'.format(
+                        url=self.doi_link(), 
+                        doi=self.doi,
+                  )
+        out = dedent("""<div> 
+        <span style="{titlestyle}">Title: {titlelink}</span></br>
+        <span>DOI: <span>{doilink} 
+        </div>
+        """).format(doilink=doilink, titlelink=titlelink, titlestyle=titlestyle)
+    
+        return out
+        
+        
     def reset_memoized_attrs(self):
         """Reset attributes to None when instantiating a new article object.
 
@@ -109,34 +165,6 @@ def doi(self, d):
         self.reset_memoized_attrs()
         self._doi = d
 
-    def __str__(self, exclude_refs=True):
-        """Output when you print an article object on the command line.
-
-        For parsing and viewing the XML of a local article. Should not be used for hashing
-        Excludes <back> element (including references list) for easier viewing
-        :param exclude_refs: remove references from the article tree (eases print viewing)
-        """
-        parser = et.XMLParser(remove_blank_text=True)
-        tree = et.parse(self.filename, parser)
-        if exclude_refs:
-            root = tree.getroot()
-            back = tree.xpath('./back')
-            root.remove(back[0])
-        local_xml = et.tostring(tree,
-                                method='xml',
-                                encoding='unicode',
-                                pretty_print=True)
-        return local_xml
-
-    def __repr__(self):
-        """Value of an article object when you call it directly on the command line.
-
-        Shows the DOI and title of the article
-        :returns: DOI and title
-        :rtype: {str}
-        """
-        out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title)
-        return out
 
     def doi_link(self):
         """The link of the DOI, which redirects to the journal URL."""
diff --git a/allofplos/utils.py b/allofplos/utils.py
new file mode 100644
index 00000000..02d1cbb8
--- /dev/null
+++ b/allofplos/utils.py
@@ -0,0 +1,27 @@
+import textwrap
+
+def dedent(text):
+    """Equivalent of textwrap.dedent that ignores unindented first line.
+    This means it will still dedent strings like:
+    '''foo
+    is a bar
+    '''
+    For use in wrap_paragraphs.
+    
+    Taken from https://github.com/ipython/ipython_genutils/text.py
+    """
+
+    if text.startswith('\n'):
+        # text starts with blank line, don't ignore the first line
+        return textwrap.dedent(text)
+
+    # split first line
+    splits = text.split('\n',1)
+    if len(splits) == 1:
+        # only one line
+        return textwrap.dedent(text)
+
+    first, rest = splits
+    # dedent everything but the first line
+    rest = textwrap.dedent(rest)
+    return '\n'.join([first, rest])