Source code for epsman.repo.nbHeaderPost

"""
epsman

Local python script for Notebook header read & write.

Can be called from Fabric for remote run case, only requires standard libs + nbformat for notebook IO.

May be a better way to do this?

16/12/19    v1

TO DO:
- Loop over list of local files? Currently need to call per file.

"""

import nbformat
import sys
import os
from pathlib import Path
from datetime import datetime

# Settings
nbVersion = 4  # May not be required?

# Get info from notebook file.
[docs]def getInfo(inputNB):

    # Grab job details - template specific, this grabs lines from cell with job summary output.
    # Basic try/except functionality here for cases with bad/missing data - likley indicates an aborted eps job.
    # TODO: add search routine here.
    # TODO: add additional job info - currently commented out since it needs to be correctly propagated.
    try:
        # jobInfo = inputNB['cells'][6]['outputs'][0]['text'].split('\n')[2:6]  # Job info output
        jobInfo = inputNB['cells'][9]['outputs'][0]['text'].split('\n')[2:8]  # Job summary output
        # jobInfo.extend(inputNB['cells'][7]['outputs'][0]['text'].split('\n')[16:])  #ePSproc file read info
        print(*jobInfo, sep='\n')  # Print info to terminal, can also use to pass info via Fabric connection object when run remotely.

    except KeyError:
        jobInfo = None
        print('***Missing jobInfo')



    return jobInfo


# Define header info
# Note on badges: these appear centered in Jupyter viewer, so put at head and tail of page only.
# Left aligned in HTML via nbSphinx.
# Don't include in nbSphine header at HTML gen time, since this is missing DOI info.
[docs]def constructHeader(jobInfo, fileIn, title, doi = None):

    # Ensure fileIn is Path object
    fileIn = Path(fileIn)

    # Set webroot assuming molecule/notebook format for both fileIn and web.
    webURL = f"https://phockett.github.io/ePSdata/{fileIn.parts[-2]}/{fileIn.stem}.html"
    # Format for Zenodo, e.g. doi 10.5281/zenodo.3600654 corresponds to https://zenodo.org/record/3600654
    if doi is not None:
        zenodoURL = f"https://zenodo.org/record/{doi.split('.')[-1]}"
        zenodoBadge = f"[![DOI](https://zenodo.org/badge/doi/{doi}.svg)](http://dx.doi.org/{doi})"
    else:
        zenodoURL = ''
        zenodoBadge = ''

    # Creative Commons licensing
    # Raw HTML - doesn't pass through nbSphinx
    # ccText = 'Licensed under <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)</a>'
    # ccBadge = '<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)">'
    # MD version...
    ccText = 'Licensed under [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/)'
    ccBadge = '[![Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)](https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png)](https://creativecommons.org/licenses/by-nc-sa/4.0/)'

    # Construct new header with file info + DOI.
    # Note formatting for Markdown - \n\n or <br> to ensure newline, but need \n after headings, and \n\n or <br> for bodytext.
    # sourceText = ("\n".join(['# ePSproc: ' + jobInfo[1].split(',')[0],
    #                             "<br>".join([
    #                                 '*electronic structure input*: ' + Path(jobInfo[-1].split()[-1]).name[0:-1], # Grab name, -1 to drop ''
    #                                 '*ePS output file*: ' + fileIn.stem + '.inp.out',
    #                                 f"*Web version*: {webURL}",
    #                                 f"Dataset: "
    #                                 f"DOI (dataset): [{doi}](http://dx.doi.org/{doi})",
    #                                 '[Citation details](#Cite-this-dataset)']),
    #                             '',
    #                             '## Job details',
    #                              "<br>".join(jobInfo[0:4])]))
    # NOTE: <br> case doesn't propagate through nbsphinx... use \n and list formatting instead.
    sourceText = ("\n".join([f"{zenodoBadge}  {ccBadge}", '\n# ePSproc: ' + title,
    # sourceText = ("\n".join([f"{zenodoBadge}  {ccBadge}", '\n# ' + title,   # For web case, may want to remove 'ePSproc'...?
                                "\n- ".join(['\n- '
                                    '*electronic structure input*: ' + Path(jobInfo[-1].split()[-1]).name[0:-1], # Grab name, -1 to drop ''
                                    '*ePS output file*: ' + fileIn.stem + '.inp.out',
                                    f"*Web version*: {webURL}",
                                    f"Dataset: {zenodoURL}",
                                    f"DOI (dataset): [{doi}](http://dx.doi.org/{doi})",  # Plain text
                                    # f"DOI (dataset): {zenodoBadge}",   # Badge version.
                                    ccText,
                                    '[Citation details](#Cite-this-dataset)']),
                                '',
                                '## Job details',
                                 # "\n- ".join(jobInfo[0:4])]))
                                 "".join(map('\n- {}'.format, jobInfo[0:4]))]))  #With map to allow prefix on all lines.


    # TODO: DOI badge to add, format [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.18914.svg)](http://dx.doi.org/10.5281/zenodo.18914)

    return sourceText


# Define footer info
# TODO: add citation info here
[docs]def constructFooter(jobInfo, fileIn, datasetName, doi = None):

    #Job details
    # TODO: fix this for old jobs or add override.
    title = 'ePSproc: ' + datasetName

    # Set webroot assuming molecule/notebook format for both fileIn and web.
    webURL = f"https://phockett.github.io/ePSdata/{fileIn.parts[-2]}/{fileIn.stem}.html"

    # year = datetime.now().year  # Set as current year
    year = jobInfo[3].split()[-1] # Set as ePS job year

    sourceText = f"""
## Cite this dataset

Hockett, Paul ({year}). *{title}*. Dataset on Zenodo. DOI: {doi}. URL: {webURL}

*Bibtex*:
```bibtex
@data{{{datasetName},
    title = {{{title}}}
    author = {{Hockett, Paul}},
    doi = {{{doi}}},
    publisher = {{Zenodo}},
    year = {{{year}}},
    url = {{{webURL}}}
  }}
```

See [citation notes on ePSdata](https://phockett.github.io/ePSdata/cite.html) for further details.

[![DOI](https://zenodo.org/badge/doi/{doi}.svg)](http://dx.doi.org/{doi})  [![Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)](https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png)](https://creativecommons.org/licenses/by-nc-sa/4.0/)

"""

    return sourceText



# Routine to grab relevant electronic structure files and copy to job dir for packaging.
# def getEstructureFiles(fileIn, fileInfo):
#     """Copy electronic structure files to job output file structure"""
#
#  *** THIS IS NOW SET IN _repo.py, cpESFiles()


# Write header info and save notebook
[docs]def writeHeader(inputNB, sourceText):

    # Replace header cell and save.
    # inputNB['cells'][0] = nbformat.v4.new_markdown_cell(source = ['doi: ', doi])
    inputNB['cells'][0] = nbformat.v4.new_markdown_cell(source = sourceText)
    nbformat.write(inputNB, fileIn.as_posix(), version = 4)

# Write header info and save notebook
[docs]def writeFooter(inputNB, sourceText):
    # Replace header cell and save.
    # inputNB['cells'][0] = nbformat.v4.new_markdown_cell(source = ['doi: ', doi])
    # inputNB['cells'].append(nbformat.v4.new_markdown_cell(source = sourceText))
    inputNB['cells'][-1] = nbformat.v4.new_markdown_cell(source = sourceText)
    nbformat.write(inputNB, fileIn.as_posix(), version = 4)


# Write markdown readme file to include with job
[docs]def writeReadme(sourceTextHead, sourceTextFoot):

    # Set files
    textFile = fileIn.with_suffix('.md')
    readmeFile = Path(fileIn.parent, 'readme.txt')

    # Format and write summary file, markdown format
    sourceText = f"""
# ePSdata dataset

See [about ePSdata](https://phockett.github.io/ePSdata/about.html) for details.

{sourceTextHead}

{sourceTextFoot}

    """

    with open(textFile, 'w') as f:
        f.write(sourceText)

    print(f"***Written md summary file: {textFile}")


    # Format and write generic readme
    sourceText = f"""
ePSdata dataset general readme

This dataset contains files:
- readme.txt    This file.
- <file>.md     Markdown (text) file summarising the dataset, including dataset-specific links and citation information.
- <file>.ipynb  Jupyter notebook file with basic post-processing (for an HTML version, see https://phockett.github.io/ePSdata).
- <file>.zip    Archive of source files, may be in a multipart zip format due to repository file-size limits.*
- <file>.json   Full job details in JSON format, including archive file list.

For more details, see:
https://phockett.github.io/ePSdata/about.html
https://github.com/phockett/ePSdata/

Licensed under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0) https://creativecommons.org/licenses/by-nc-sa/4.0/

* To rebuild a multipart zip (Linux):
$ zip -s 0 multipart.zip --out output.zip

    """

    with open(readmeFile, 'w') as f:
        f.write(sourceText)

    print(f"***Written readme file: {readmeFile}")



# If running as main, take passed args and run functions.
if __name__ == "__main__":
    # Passed args
    fileIn = Path(sys.argv[1])

    # if len(sys.argv)>2:
    #     doi = sys.argv[2]
    # else:
    #     doi = None
    #
    # if len(sys.argv)>3:
    #     title = sys.argv[3]
    # else:
    #     title = None

    # Case for passing all args, but may be None
    doi = sys.argv[2]
    title = sys.argv[3]

    # Read notebook
    print(f'\n***Reading notebook: {fileIn}')
    inputNB = nbformat.read(fileIn.as_posix(), as_version = nbVersion)

    # Get job info from file
    jobInfo = getInfo(inputNB)

    # Set file info if doi is passed
    # If passed at command line this may be a string
    if ((doi is not None) and (doi!='None')) and (jobInfo is not None):

        if (title is None) or (title == 'None'):
            title = jobInfo[1].split(',')[0]  # Default job name if not overridden

        # Generate header from jobInfo
        sourceTextHead = constructHeader(jobInfo, fileIn, title, doi)
        writeHeader(inputNB, sourceTextHead)
        print(f'\n***Written notebook header: {fileIn}, job name: {title}')

        sourceTextFoot = constructFooter(jobInfo, fileIn, title, doi)
        writeFooter(inputNB, sourceTextFoot)
        print(f'\n***Written notebook footer: {fileIn}, job name: {title}')

        writeReadme(sourceTextHead, sourceTextFoot)

    else:
        pass


# return Path(jobInfo[-1].split()[-1]).name[0:-1]