2019-05-06 20:19:57 +02:00
"""
An inefficient monolithic piece of code that ' ll generate jupyter notebook
from the projects main README .
2019-10-30 20:29:57 +01:00
PS : If you are a recruiter , please don ' t judge me by this piece of code. I wrote it
in hurry . I know this is messy and can be simplified , but I don ' t want to change it
much because it just works .
2019-05-06 20:19:57 +02:00
2019-10-30 20:29:57 +01:00
Simplifictions and improvements through patches are more than welcome however : )
#TODOs
- CLI arguments for running this thing
2019-05-06 20:19:57 +02:00
- Add it to prepush hook
2019-10-30 20:29:57 +01:00
- Add support for skip comments , to skip examples that are not meant for notebook environment .
2019-05-06 20:19:57 +02:00
- Use templates ?
"""
2019-05-06 20:17:24 +02:00
import json
2019-10-31 19:20:50 +01:00
import os
2019-05-01 15:42:17 +02:00
import pprint
2019-10-31 19:20:50 +01:00
2019-11-02 20:24:27 +01:00
fpath = os . path . join ( os . path . dirname ( __file__ ) , ' .. ' , ' README.md ' )
2019-05-01 15:42:17 +02:00
examples = [ ]
# The globals
current_example = 1
2019-05-06 20:17:24 +02:00
sequence_num = 1
2019-05-01 15:42:17 +02:00
current_section_name = " "
2019-05-06 20:17:24 +02:00
STATEMENT_PREFIXES = [ " ... " , " >>> " , " $ " ]
2019-12-20 19:46:19 +01:00
HOSTED_NOTEBOOK_INSTRUCTIONS = """
2019-12-21 15:09:49 +01:00
## Hosted notebook instructions
2019-12-20 19:46:19 +01:00
This is just an experimental attempt of browsing wtfpython through jupyter notebooks . Some examples are read - only because ,
- they either require a version of Python that ' s not supported in the hosted runtime.
- or they can ' t be reproduced in the notebook envrinonment.
The expected outputs are already present in collapsed cells following the code cells . The Google colab provides Python2 ( 2.7 ) and Python3 ( 3.6 , default ) runtimes . You can switch among these for Python2 specific examples . For examples specific to other minor versions , you can simply refer to collapsed outputs ( it ' s not possible to control the minor version in hosted notebooks as of now). You can check the active version using
` ` ` py
>> > import sys
>> > sys . version
# Prints out Python version here.
` ` `
2019-12-21 15:09:49 +01:00
That being said , most of the examples do work as expected . If you face any trouble , feel free to consult the original content on wtfpython and create an issue in the repo . Have fun !
2019-12-20 19:46:19 +01:00
- - -
"""
2019-05-06 20:17:24 +02:00
def generate_code_block ( statements , output ) :
2021-12-23 04:53:28 +01:00
"""
Generates a code block that executes the given statements .
: param statements : The list of statements to execute .
: type statements : list ( str )
"""
2019-05-06 20:17:24 +02:00
global sequence_num
result = {
" type " : " code " ,
" sequence_num " : sequence_num ,
" statements " : statements ,
" output " : output
}
sequence_num + = 1
return result
def generate_markdown_block ( lines ) :
2021-12-23 04:53:28 +01:00
"""
Generates a markdown block from a list of lines .
"""
2019-05-06 20:17:24 +02:00
global sequence_num
result = {
" type " : " markdown " ,
" sequence_num " : sequence_num ,
" value " : lines
}
sequence_num + = 1
return result
2019-11-02 18:55:58 +01:00
2019-05-06 20:17:24 +02:00
def is_interactive_statement ( line ) :
for prefix in STATEMENT_PREFIXES :
2019-11-02 18:55:58 +01:00
if line . lstrip ( ) . startswith ( prefix ) :
2019-05-06 20:17:24 +02:00
return True
return False
2019-11-02 18:55:58 +01:00
2019-12-19 20:01:42 +01:00
def parse_example_parts ( lines , title , current_line ) :
2021-12-23 04:53:28 +01:00
"""
Parse the given lines and return a dictionary with two keys :
build_up , which contains all the text before an H4 ( explanation ) is encountered ,
and
explanation , which contains all the text after build_up until - - - or another H3 is encountered .
"""
2019-05-01 15:42:17 +02:00
parts = {
" build_up " : [ ] ,
" explanation " : [ ]
}
2019-12-19 20:01:42 +01:00
content = [ title ]
2019-05-06 20:17:24 +02:00
statements_so_far = [ ]
output_so_far = [ ]
2019-12-19 20:01:42 +01:00
next_line = current_line
2019-05-01 15:42:17 +02:00
# store build_up till an H4 (explanation) is encountered
2019-11-02 20:24:27 +01:00
while not ( next_line . startswith ( " #### " ) or next_line . startswith ( ' --- ' ) ) :
2019-05-01 15:42:17 +02:00
# Watching out for the snippets
2019-11-02 20:24:27 +01:00
if next_line . startswith ( " ```py " ) :
2019-05-01 15:42:17 +02:00
# It's a snippet, whatever found until now is text
2019-05-06 20:17:24 +02:00
is_interactive = False
2019-11-02 20:24:27 +01:00
output_encountered = False
2019-05-01 15:42:17 +02:00
if content :
2019-05-06 20:17:24 +02:00
parts [ " build_up " ] . append ( generate_markdown_block ( content ) )
2019-05-01 15:42:17 +02:00
content = [ ]
next_line = next ( lines )
2019-05-06 20:17:24 +02:00
2019-05-01 15:42:17 +02:00
while not next_line . startswith ( " ``` " ) :
2019-05-06 20:17:24 +02:00
if is_interactive_statement ( next_line ) :
is_interactive = True
if ( output_so_far ) :
parts [ " build_up " ] . append ( generate_code_block ( statements_so_far , output_so_far ) )
statements_so_far , output_so_far = [ ] , [ ]
statements_so_far . append ( next_line )
else :
# can be either output or normal code
if is_interactive :
output_so_far . append ( next_line )
2019-11-02 20:24:27 +01:00
elif output_encountered :
output_so_far . append ( next_line )
2019-05-06 20:17:24 +02:00
else :
statements_so_far . append ( next_line )
2019-05-01 15:42:17 +02:00
next_line = next ( lines )
2019-05-06 20:17:24 +02:00
2019-05-01 15:42:17 +02:00
# Snippet is over
2019-05-06 20:17:24 +02:00
parts [ " build_up " ] . append ( generate_code_block ( statements_so_far , output_so_far ) )
statements_so_far , output_so_far = [ ] , [ ]
2019-05-01 15:42:17 +02:00
next_line = next ( lines )
else :
# It's a text, go on.
content . append ( next_line )
next_line = next ( lines )
# Explanation encountered, save any content till now (if any)
if content :
2019-05-06 20:17:24 +02:00
parts [ " build_up " ] . append ( generate_markdown_block ( content ) )
2019-05-01 15:42:17 +02:00
# Reset stuff
content = [ ]
2019-05-06 20:17:24 +02:00
statements_so_far , output_so_far = [ ] , [ ]
2019-05-01 15:42:17 +02:00
# store lines again until --- or another H3 is encountered
while not ( next_line . startswith ( " --- " ) or
next_line . startswith ( " ### " ) ) :
2019-11-02 20:24:27 +01:00
if next_line . lstrip ( ) . startswith ( " ```py " ) :
2019-05-01 15:42:17 +02:00
# It's a snippet, whatever found until now is text
2019-05-06 20:17:24 +02:00
is_interactive = False
2019-05-01 15:42:17 +02:00
if content :
2019-11-02 18:55:58 +01:00
parts [ " explanation " ] . append ( generate_markdown_block ( content ) )
2019-05-01 15:42:17 +02:00
content = [ ]
next_line = next ( lines )
2019-05-06 20:17:24 +02:00
2019-11-02 18:55:58 +01:00
while not next_line . lstrip ( ) . startswith ( " ``` " ) :
2019-05-06 20:17:24 +02:00
if is_interactive_statement ( next_line ) :
is_interactive = True
if ( output_so_far ) :
2019-11-02 18:55:58 +01:00
parts [ " explanation " ] . append ( generate_code_block ( statements_so_far , output_so_far ) )
2019-05-06 20:17:24 +02:00
statements_so_far , output_so_far = [ ] , [ ]
statements_so_far . append ( next_line )
else :
# can be either output or normal code
if is_interactive :
output_so_far . append ( next_line )
else :
statements_so_far . append ( next_line )
2019-05-01 15:42:17 +02:00
next_line = next ( lines )
2019-05-06 20:17:24 +02:00
2019-05-01 15:42:17 +02:00
# Snippet is over
2019-11-02 18:55:58 +01:00
parts [ " explanation " ] . append ( generate_code_block ( statements_so_far , output_so_far ) )
2019-05-06 20:17:24 +02:00
statements_so_far , output_so_far = [ ] , [ ]
2019-05-01 15:42:17 +02:00
next_line = next ( lines )
else :
# It's a text, go on.
content . append ( next_line )
next_line = next ( lines )
# All done
if content :
2019-05-06 20:17:24 +02:00
parts [ " explanation " ] . append ( generate_markdown_block ( content ) )
2019-05-01 15:42:17 +02:00
return next_line , parts
2019-11-02 18:55:58 +01:00
2019-05-06 20:17:24 +02:00
def remove_from_beginning ( tokens , line ) :
for token in tokens :
2019-11-02 18:55:58 +01:00
if line . lstrip ( ) . startswith ( token ) :
2019-05-06 20:17:24 +02:00
line = line . replace ( token , " " )
return line
def inspect_and_sanitize_code_lines ( lines ) :
2021-12-23 04:53:28 +01:00
"""
Remove lines from the beginning of a code block that are not statements .
: param lines : A list of strings , each representing a line in the code block .
: returns is_print_present , sanitized_lines : A boolean indicating whether print was present in the original code and a list of strings representing
sanitized lines . The latter may be an empty list if all input lines were removed as comments or whitespace ( and thus did not contain any statements ) .
This function does not remove blank lines at the end of ` lines ` .
"""
2019-05-06 20:17:24 +02:00
tokens_to_remove = STATEMENT_PREFIXES
result = [ ]
is_print_present = False
for line in lines :
line = remove_from_beginning ( tokens_to_remove , line )
if line . startswith ( " print " ) or line . startswith ( " print( " ) :
is_print_present = True
result . append ( line )
return is_print_present , result
2019-11-02 18:55:58 +01:00
2019-12-19 20:01:42 +01:00
def convert_to_cells ( cell_contents , read_only ) :
2021-12-23 04:53:28 +01:00
"""
Converts a list of dictionaries containing markdown and code cells into a Jupyter notebook .
: param cell_contents : A list of dictionaries , each
dictionary representing either a markdown or code cell . Each dictionary should have the following keys : " type " , which is either " markdown " or " code " ,
and " value " . The value for type = ' markdown ' is the content as string , whereas the value for type = ' code ' is another dictionary with two keys ,
statements and output . The statements key contains all lines in between ` ` ` py \n ` ` ` ( including ) until ` ` ` \n ` ` ` , while output contains all lines after
` ` ` . output py \n ` ` ` .
: type cell_contents : List [ Dict ]
: param read_only ( optional ) : If True then only print outputs are included in converted
cells . Default False
: type read_only ( optional ) : bool
: returns A Jupyter notebook containing all cells from input parameter ` cell_contents ` .
Each converted cell has metadata attribute collapsed set to true if it ' s code-cell otherwise None if it ' s markdow - cell .
"""
2019-05-06 20:17:24 +02:00
cells = [ ]
for stuff in cell_contents :
if stuff [ " type " ] == " markdown " :
# todo add metadata later
cells . append (
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : stuff [ " value " ]
}
)
elif stuff [ " type " ] == " code " :
2019-12-19 20:01:42 +01:00
if read_only :
# Skip read only
# TODO: Fix
cells . append (
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [ " ```py \n " ] + stuff [ " statements " ] + [ " ``` \n " ] + [ " ```py \n " ] + stuff [ ' output ' ] + [ " ``` \n " ]
}
)
continue
2019-05-06 20:17:24 +02:00
is_print_present , sanitized_code = inspect_and_sanitize_code_lines ( stuff [ " statements " ] )
if is_print_present :
cells . append (
{
" cell_type " : " code " ,
" metadata " : {
2019-12-19 20:01:42 +01:00
" collapsed " : True ,
2019-05-06 20:17:24 +02:00
} ,
" execution_count " : None ,
" outputs " : [ {
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : stuff [ " output " ]
} ] ,
" source " : sanitized_code
}
)
else :
cells . append (
{
" cell_type " : " code " ,
" execution_count " : None ,
" metadata " : {
" collapsed " : True
} ,
" outputs " : [ {
" data " : {
" text/plain " : stuff [ " output " ]
} ,
" output_type " : " execute_result " ,
" metadata " : { } ,
" execution_count " : None
} ] ,
" source " : sanitized_code
}
)
return cells
2019-11-02 20:24:27 +01:00
def convert_to_notebook ( pre_examples_content , parsed_json , post_examples_content ) :
2021-12-23 04:53:28 +01:00
"""
Convert a JSON file containing the examples to a Jupyter Notebook .
"""
2019-05-06 20:17:24 +02:00
result = {
" cells " : [ ] ,
" metadata " : { } ,
" nbformat " : 4 ,
" nbformat_minor " : 2
}
2019-11-02 20:24:27 +01:00
2019-12-20 19:46:19 +01:00
notebook_path = " wtf.ipynb "
2019-11-02 20:24:27 +01:00
2019-12-19 20:01:42 +01:00
result [ " cells " ] + = convert_to_cells ( [ generate_markdown_block ( pre_examples_content ) ] , False )
2019-11-02 20:24:27 +01:00
2019-05-06 20:17:24 +02:00
for example in parsed_json :
parts = example [ " parts " ]
build_up = parts . get ( " build_up " )
explanation = parts . get ( " explanation " )
2019-12-19 20:01:42 +01:00
read_only = example . get ( " read_only " )
2019-05-06 20:17:24 +02:00
2019-11-02 18:55:58 +01:00
if build_up :
2019-12-19 20:01:42 +01:00
result [ " cells " ] + = convert_to_cells ( build_up , read_only )
2019-05-06 20:17:24 +02:00
2019-11-02 18:55:58 +01:00
if explanation :
2019-12-19 20:01:42 +01:00
result [ " cells " ] + = convert_to_cells ( explanation , read_only )
2019-05-06 20:17:24 +02:00
2019-12-19 20:01:42 +01:00
result [ " cells " ] + = convert_to_cells ( [ generate_markdown_block ( post_examples_content ) ] , False )
2019-11-02 20:24:27 +01:00
2019-12-19 20:01:42 +01:00
#pprint.pprint(result, indent=2)
2019-05-06 20:17:24 +02:00
with open ( notebook_path , " w " ) as f :
2019-12-23 11:28:25 +01:00
json . dump ( result , f , indent = 2 )
2019-05-06 20:17:24 +02:00
2019-10-31 19:20:50 +01:00
with open ( fpath , ' r+ ' , encoding = " utf-8 " ) as f :
2019-05-01 15:42:17 +02:00
lines = iter ( f . readlines ( ) )
line = next ( lines )
result = [ ]
2019-11-02 18:55:58 +01:00
pre_examples_phase = True
pre_stuff = [ ]
post_stuff = [ ]
2019-05-01 15:42:17 +02:00
try :
while True :
if line . startswith ( " ## " ) :
2019-11-02 18:55:58 +01:00
pre_examples_phase = False
2019-05-01 15:42:17 +02:00
# A section is encountered
current_section_name = line . replace ( " ## " , " " ) . strip ( )
2019-05-06 20:17:24 +02:00
section_text = [ ]
2019-05-01 15:42:17 +02:00
line = next ( lines )
# Until a new section is encountered
2019-11-02 20:24:27 +01:00
while not ( line . startswith ( " ## " ) or line . startswith ( " # " ) ) :
2019-05-01 15:42:17 +02:00
# check if it's a H3
if line . startswith ( " ### " ) :
# An example is encountered
2019-12-19 20:01:42 +01:00
title_line = line
line = next ( lines )
read_only = False
while line . strip ( ) == " " or line . startswith ( ' <!-- ' ) :
#TODO: Capture example ID here using regex.
if ' <!-- read-only --> ' in line :
read_only = True
line = next ( lines )
2019-05-01 15:42:17 +02:00
example_details = {
" id " : current_example ,
2019-12-19 20:01:42 +01:00
" title " : title_line . replace ( " ### " , " " ) ,
" section " : current_section_name ,
" read_only " : read_only
2019-05-01 15:42:17 +02:00
}
2019-12-19 20:01:42 +01:00
line , example_details [ " parts " ] = parse_example_parts ( lines , title_line , line )
2019-05-01 15:42:17 +02:00
result . append ( example_details )
current_example + = 1
else :
2019-05-06 20:17:24 +02:00
section_text . append ( line )
2019-05-01 15:42:17 +02:00
line = next ( lines )
else :
2019-11-02 18:55:58 +01:00
if pre_examples_phase :
pre_stuff . append ( line )
else :
post_stuff . append ( line )
2019-05-01 15:42:17 +02:00
line = next ( lines )
2019-11-02 20:24:27 +01:00
except StopIteration as e :
2019-12-19 20:01:42 +01:00
#pprint.pprint(result, indent=2)
2019-12-20 19:46:19 +01:00
pre_stuff . append ( HOSTED_NOTEBOOK_INSTRUCTIONS )
2019-12-21 15:09:49 +01:00
result . sort ( key = lambda x : x [ " read_only " ] )
2019-11-02 20:24:27 +01:00
convert_to_notebook ( pre_stuff , result , post_stuff )