def parse_md_file_to_react(path, target_dir, file, is_folder_readme=False, is_course_readme=False):
    article_data = {}
    is_readme = is_folder_readme or is_course_readme

    # set article type (article, folder readme, course readme)
    article_data['type'] = 'article'
    if is_folder_readme: article_data['type'] = 'folder'
    if is_course_readme: article_data['type'] = 'course'

    # extract article date from first line
    article_data['coming_soon'] = False
    if not is_readme: # don't extract date from readmes
        try:
            first, second, rest = file.split('\n', 2)
            if second.startswith('created '):
                mod_date = datetime.strptime(first, '%d/%m/%Y %H:%M').timestamp()
                article_data['mod_timestamp'] = mod_date
                article_data['mod_date_time'] = timestamp_to_str(mod_date)
                cr_date = datetime.strptime(second.split(' ', 1)[1], '%d/%m/%Y %H:%M').timestamp()
                article_data['cr_timestamp'] = cr_date
                article_data['cr_date_time'] = timestamp_to_str(cr_date)
                file = rest
            else: # no creation date supplied
                mod_date = datetime.strptime(first, '%d/%m/%Y %H:%M').timestamp()
                article_data['mod_timestamp'] = mod_date
                article_data['mod_date_time'] = timestamp_to_str(mod_date)
                article_data['cr_timestamp'] = mod_date
                article_data['cr_date_time'] = timestamp_to_str(mod_date)
                file = second + '\n' + rest
                
        except ValueError:
            warn(f'no date found on first line of {path}, marking as "coming soon"')
            article_data['mod_timestamp'] = PLACEHOLDER_TIMESTAMP
            article_data['mod_date_time'] = ''
            article_data['cr_timestamp'] = PLACEHOLDER_TIMESTAMP
            article_data['cr_date_time'] = ''
            article_data['coming_soon'] = True
    else:
        article_data['mod_timestamp'] = PLACEHOLDER_TIMESTAMP
        article_data['mod_date_time'] = ''
        article_data['cr_timestamp'] = PLACEHOLDER_TIMESTAMP
        article_data['cr_date_time'] = ''

    # extract article tags, if they exist
    first, rest = file.split('\n', 1)
    article_data['tags'] = []
    if first.startswith('[TAGS]'):
        for tag in first.removeprefix('[TAGS]').split(','):
            split_tag = tag.strip().split()
            if len(split_tag) == 2:
                article_data['tags'] .append({'name': split_tag[0], 'colour': split_tag[1]})
            else:
                warn(f'something wrong with the tags specified in {path}, skipping tag extraction')
        file = rest

    # identify article title
    titles = re.findall(r'```.*?\n# .*?\n```|\n# (.*?)\n', '\n'+file, re.DOTALL) # identify lines starting with '# ' that aren't inside a code block (might be first line, so prepend \n)
    titles = [t for t in titles if t != ''] # for invalid titles, the capture group is empty but still exists, so need to remove them
    if len(titles) == 0:
        article_data['title'] = 'no_title'
        warn(f'no article title found in {path}')
    else:
        article_data['title'] = titles[0]
        if len(titles) > 1:
            warn(f'multiple article titles found in {path}, using first one')

    # replace \\ with \\\\, because for some reason later \\ is replaced with \ (probably by markdown2)
    file = file.replace('\\\\','\\\\\\\\') 
    # ensure displayed latex is preceded+followed by two newlines (only for lines that start with $$, so we can still have e.g. "> $$asdf$$"), so that markdown2 will wrap it in <p>, so that it gets registered as not the first child in theorems/props etc.
    file = re.sub(r'(\n\$\$.*?\$\$)', r'\n\n\1\n\n', file, flags=re.DOTALL)
    # ensure that <Spoiler> and </Spoiler> are preceded+followed by two newlines, so that markdown2 will wrap them in p tags (i.e. there won't be junk between spoiler tag and p tag)
    file = re.sub(r'<(/?)Spoiler(/?)>\n?([^\n])', r'<\1Spoiler\2>\n\n\3', file)
    file = re.sub(r'([^\n])\n?<(/?)Spoiler(/?)>', r'\1\n\n<\2Spoiler\3>', file)
    # do the same for <hr> and </hr>
    file = re.sub(r'<(/?)hr(/?)>\n?([^\n])', r'<\1hr\2>\n\n\3', file)
    file = re.sub(r'([^\n])\n?<(/?)hr(/?)>', r'\1\n\n<\2hr\3>', file)
    # do the same for math tags
    for tag in math_tags:
        file = re.sub(r'<(/?)' + tag + r'([^>]*?)>\n?([^\n])', r'<\1' + tag + r'\2>\n\n\3', file)
        file = re.sub(r'([^\n])\n?<(/?)' + tag + r'([^>]*?)>', r'\1\n\n<\2' + tag + r'\3>', file)
    # find literal braces, for latex (so that the backslash doesn't die when being parsed)
    file = re.sub('\\\\{', '\\&#123;', file)
    file = re.sub('\\\\}', '\\&#125;', file)
    # in copiable code blocks, add copy buttons
    for m in [*re.finditer(r'__COPIABLE__\n```(.*?)\n(.*?)\n```', file, re.DOTALL)][::-1]: # reverse so can edit the string without indices changing
        lang, code = m[1], m[2]
        copiable = code.replace('\\n', '\\\\n').replace('\n','\\n') # copy button component takes newlines as literals
        copiable = copiable.replace('"', '&quot;')
        modified = '__COPIABLE__\n\n<CopyButton text="' + copiable + '"/>\n\n```' + lang + '\n' + code + '\n```'
        file = file[:m.span()[0]] + modified + file[m.span()[1]:]
    # set proofs inside theorem blocks to be unquoted and unbolded
    for tag in ['Thm', 'Lemma']:
        for m in [*re.finditer(r'(<'+tag+r'[^>]*?>)(.*?)(</'+tag+r'>)', file, re.DOTALL)][::-1]: # reverse so can edit the string without indices changing
            modified = re.sub(r'(<Proof[^>]*?)(>.*?)(</Proof>)', r'\1 unquoted unbolded\2\3', m[2], flags=re.DOTALL)
            file = file[:m.span()[0]] + m[1] + modified + m[3] + file[m.span()[1]:]

    page = markdown(file, extras=['fenced-code-blocks', 'codehilite', 'code-friendly', 'header-ids', 'footnotes', 'wiki-tables'])

    # replace curly braces with html character codes, so that react ignores them
    page = re.sub('{', '&#123;', page)
    page = re.sub('}', '&#125;', page)

    # pre doesn't work in nextjs, so find all whitespace inside pre tags and replace with character codes
    for m in [i.span() for i in re.finditer(r'(?<=<pre>)(.*?)(?=</pre>)', page, re.DOTALL)][::-1]: # reverse so can edit the string without indices changing
        target = page[m[0]:m[1]]
        target = re.sub('span ', 'span#', target) # temp
        target = re.sub(' ', '&nbsp;', target) # replace whitespace
        target = re.sub('span#', 'span ', target)
        target = re.sub('\n', '<br/>', target) # replace newlines
        page = page[:m[0]] + target + page[m[1]:]

    # add <Latex> tags
    # for blocks inside blockquotes, the background is not white (so add scrollshadow-horizontal-blockquote instead of scrollshadow-horizontal)
    page = re.sub(r'<p>\$\$(.*?)\$\$</p>', r'<Latex>\1</Latex>', page) # remove <p> tags that markdown added
    page = re.sub(r'\$\$(.*?)\$\$', r'<Latex>\1</Latex>', page) # temporarily remove double dollar signs, so that can deal with single dollar signs first
    page = re.sub(r'\$(.+?)\$', r'<Latex>$\1$</Latex>', page)
    # need block span instead of div so that doesn't trigger hydration error
    for tag in [('Thm',''), ('Lemma',''), ('Proof',''), ('Defn',''), ('Example','quoted'), ('blockquote',''), ('Spoiler', '')]:
        tag,quoted = tag
        for m in [i.span() for i in re.finditer(r'<'+tag+r'[^>]*?'+quoted+r'.*?>.*?</'+tag+r'>', page, re.DOTALL)][::-1]: # reverse so can edit the string without indices changing
            target = page[m[0]:m[1]]
            # unquoted blocks aren't grey, so skip
            if 'unquoted' in re.findall(r'<.*?>',target)[0]: continue
            # replace $$
            target = re.sub(r'<Latex>([^$].*?[^$])</Latex>', r'<span className="scrollshadow-horizontal-blockquote latex-display-wrapper"><Latex>$$\1$$</Latex></span>', target)
            page = page[:m[0]] + target + page[m[1]:]
    page = re.sub(r'<Latex>([^$].*?[^$])</Latex>', r'<span className="scrollshadow-horizontal latex-display-wrapper"><Latex>$$\1$$</Latex></span>', page)

    # <p> tags will have been placed around the following tags (on purpose), remove them
    for i in ['CopyButton', 'Spoiler', 'hr'] + math_tags:
        page = re.sub(r'<p>(</?' + i + r'.*?>)</p>', r'\1', page)

    # find h2 tags, add link anchor to them, and generate table of contents from h2 tags (each h2 tag is given a unique id by the header-ids extension)
    table_of_contents = [[i.group(2),'#'+i.group(1)] for i in re.finditer(r'<h2 id="(.*?)">(.*?)</h2>', page, re.DOTALL)]
    page = add_link_anchors(page, target_dir)

    # markdown added class attributes, replace with "className" for react
    page = page.replace('class=','className=')

    # move copy buttons generated above (i.e. in code blocks marked __COPIABLE__) into their containers
    page = re.sub(r'<p>__COPIABLE__</p>\n\n<CopyButton(.*?)/>\n\n<div className="codehilite">\n<pre>(.*?)</pre>\n</div>', r'<div className="codehilite relative">\n<div className="absolute top-2 right-2"><CopyButton\1/></div>\n<pre>\2</pre>\n</div>', page)
    page = re.sub(r'<p>__COPIABLE__</p>\n\n<CopyButton(.*?)/>\n\n<pre>(.*?)</pre>', r'<pre className="relative">\n<div className="absolute top-2 right-2"><CopyButton\1/></div>\n\2</pre>', page)

    article_data['content'] = flatten_content(page, article_data['title'])
    article_data['id'] = hash(path.removeprefix(ROOT_DIR))

    path_list = sanitize(beautify(path)).split('/')[1:-1] # path to parent folder
    article_data['dir'] = path_list if not is_readme else path_list[:-1] # readmes should be elevated

    article_name = target_dir.split('/')[-1]
    article_data['name'] = article_name

    page_title = article_name.title().replace('-',' ')

    copiable_article_plaintext = None
    if COURSE_INDICATOR not in path: # don't add "copy article plaintext" button to course content
        plaintext = article_data['content']
        plaintext = eval('"'+ plaintext.replace('"','\\"')+'"')
        plaintext = html.escape(plaintext)
        plaintext = plaintext.replace('"', '&quot;')
        # replace braces for nextjs
        plaintext = plaintext.replace('{', '&#123;').replace('}', '&#125;')
        plaintext = plaintext.replace('\\n', '\\\\n').replace('\n', '\\n') # copy button component takes newlines as literals
        copiable_article_plaintext = plaintext
    
    return page, article_data, page_title, copiable_article_plaintext, table_of_contents