From efdac8f9caf82336d6c13686a7a9c393fa3ccbab Mon Sep 17 00:00:00 2001 From: Satwik Kansal Date: Mon, 6 May 2019 23:47:24 +0530 Subject: [PATCH] Get the generator in a working state --- irrelevant/json_generator.py | 239 +++++++++++++++++++++++++---------- 1 file changed, 175 insertions(+), 64 deletions(-) diff --git a/irrelevant/json_generator.py b/irrelevant/json_generator.py index 86820f1..ea75cb5 100644 --- a/irrelevant/json_generator.py +++ b/irrelevant/json_generator.py @@ -1,52 +1,85 @@ +import json import pprint fname = "/Users/300041709/code/self/wtfpython/README.md" examples = [] # The globals current_example = 1 +sequence_num = 1 current_section_name = "" -def parse_example_parts(lines): +STATEMENT_PREFIXES = ["...", ">>> ", "$ "] + + +def generate_code_block(statements, output): + global sequence_num + result = { + "type": "code", + "sequence_num": sequence_num, + "statements": statements, + "output": output + } + sequence_num += 1 + return result + + +def generate_markdown_block(lines): + global sequence_num + result = { + "type": "markdown", + "sequence_num": sequence_num, + "value": lines + } + sequence_num += 1 + return result + +def is_interactive_statement(line): + for prefix in STATEMENT_PREFIXES: + if line.startswith(prefix): + return True + return False + +def parse_example_parts(lines, example_title_line): parts = { "build_up": [], "explanation": [] } - next_line = next(lines) - sequence_num = 1 content = [] + statements_so_far = [] + output_so_far = [] + next_line = example_title_line # store build_up till an H4 (explanation) is encountered while not next_line.startswith("#### "): # Watching out for the snippets if next_line.startswith("```"): # It's a snippet, whatever found until now is text + is_interactive = False if content: - parts["build_up"].append( - { - "type": "text", - "sequence_num": sequence_num, - "value": content - } - ) - sequence_num += 1 + parts["build_up"].append(generate_markdown_block(content)) content = [] next_line = next(lines) + while not next_line.startswith("```"): - content.append(next_line) + if is_interactive_statement(next_line): + is_interactive = True + if (output_so_far): + parts["build_up"].append(generate_code_block(statements_so_far, output_so_far)) + statements_so_far, output_so_far = [], [] + statements_so_far.append(next_line) + else: + # can be either output or normal code + if is_interactive: + output_so_far.append(next_line) + else: + statements_so_far.append(next_line) next_line = next(lines) + # Snippet is over - parts["build_up"].append( - { - "type": "code", - "sequence_num": sequence_num, - "value": content - } - ) - sequence_num += 1 - content = [] + parts["build_up"].append(generate_code_block(statements_so_far, output_so_far)) + statements_so_far, output_so_far = [], [] next_line = next(lines) - continue else: # It's a text, go on. content.append(next_line) @@ -54,51 +87,43 @@ def parse_example_parts(lines): # Explanation encountered, save any content till now (if any) if content: - parts["build_up"].append( - { - "type": "text", - "sequence_num": sequence_num, - "value": content - } - ) + parts["build_up"].append(generate_markdown_block(content)) # Reset stuff - sequence_num = 1 content = [] + statements_so_far, output_so_far = [], [] # store lines again until --- or another H3 is encountered while not (next_line.startswith("---") or next_line.startswith("### ")): - if next_line.startswith("```"): # It's a snippet, whatever found until now is text + is_interactive = False if content: - parts["explanation"].append( - { - "type": "text", - "sequence_num": sequence_num, - "value": content - } - ) - sequence_num += 1 + parts["build_up"].append(generate_markdown_block(content)) content = [] next_line = next(lines) + while not next_line.startswith("```"): - content.append(next_line) + if is_interactive_statement(next_line): + is_interactive = True + if (output_so_far): + parts["build_up"].append(generate_code_block(statements_so_far, output_so_far)) + statements_so_far, output_so_far = [], [] + statements_so_far.append(next_line) + else: + # can be either output or normal code + if is_interactive: + output_so_far.append(next_line) + else: + statements_so_far.append(next_line) next_line = next(lines) + # Snippet is over - parts["explanation"].append( - { - "type": "code", - "sequence_num": sequence_num, - "value": content - } - ) - sequence_num += 1 - content = [] + parts["build_up"].append(generate_code_block(statements_so_far, output_so_far)) + statements_so_far, output_so_far = [], [] next_line = next(lines) - continue else: # It's a text, go on. content.append(next_line) @@ -106,16 +131,105 @@ def parse_example_parts(lines): # All done if content: - parts["explanation"].append( - { - "type": "text", - "sequence_num": sequence_num, - "value": content - } - ) + parts["explanation"].append(generate_markdown_block(content)) return next_line, parts +def remove_from_beginning(tokens, line): + for token in tokens: + if line.startswith(token): + line = line.replace(token, "") + return line + + +def inspect_and_sanitize_code_lines(lines): + tokens_to_remove = STATEMENT_PREFIXES + result = [] + is_print_present = False + for line in lines: + line = remove_from_beginning(tokens_to_remove, line) + if line.startswith("print ") or line.startswith("print("): + is_print_present = True + result.append(line) + return is_print_present, result + +def convert_to_cells(cell_contents): + cells = [] + for stuff in cell_contents: + if stuff["type"] == "markdown": + # todo add metadata later + cells.append( + { + "cell_type": "markdown", + "metadata": {}, + "source": stuff["value"] + } + ) + elif stuff["type"] == "code": + is_print_present, sanitized_code = inspect_and_sanitize_code_lines(stuff["statements"]) + if is_print_present: + cells.append( + { + "cell_type": "code", + "metadata": { + "collapsed": True + }, + "execution_count": None, + "outputs": [{ + "name": "stdout", + "output_type": "stream", + "text": stuff["output"] + }], + "source": sanitized_code + } + ) + else: + cells.append( + { + "cell_type": "code", + "execution_count": None, + "metadata": { + "collapsed": True + }, + "outputs": [{ + "data": { + "text/plain": stuff["output"] + }, + "output_type": "execute_result", + "metadata": {}, + "execution_count": None + }], + "source": sanitized_code + } + ) + + return cells + + +def convert_to_notebook(parsed_json): + result = { + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 + } + for example in parsed_json: + parts = example["parts"] + build_up = parts.get("build_up") + explanation = parts.get("explanation") + notebook_path = "test.ipynb" + + if(build_up): + result["cells"] += convert_to_cells(build_up) + + if(explanation): + result["cells"] += convert_to_cells(explanation) + + pprint.pprint(result, indent=2) + with open(notebook_path, "w") as f: + json.dump(result, f) + + with open(fname, 'r+', encoding="utf-8") as f: lines = iter(f.readlines()) @@ -126,6 +240,7 @@ with open(fname, 'r+', encoding="utf-8") as f: if line.startswith("## "): # A section is encountered current_section_name = line.replace("## ", "").strip() + section_text = [] line = next(lines) # Until a new section is encountered while not (line.startswith("## " )): @@ -138,19 +253,15 @@ with open(fname, 'r+', encoding="utf-8") as f: "title": line.replace("### ", ""), "section": current_section_name } - line, example_details["parts"] = parse_example_parts(lines) + line, example_details["parts"] = parse_example_parts(lines, line) result.append(example_details) current_example += 1 else: - # todo catch section text + section_text.append(line) line = next(lines) else: line = next(lines) except StopIteration: pprint.pprint(result, indent=2) - print(len(result)) - - - - + convert_to_notebook(result)