zephyr/doc/scripts/extract_content.py
Marc Herbert c1f6c7d19e doc: extract_content.py: support multiple build directories
Exclude (other) build directories with a CMakeCache.txt file as they
typically contain previous output from ourselves.

One key feature of 'out of source" builds offered by CMake is allowing
multiple build directories. For instance to build different
configurations without starting from scratch. Unfortunately, the
extract_content.py code had a severe issue with multiple build
directories with an interesting effect on build times, see simple
reproduction below:

cd doc/
cmake -B _build/
cmake -B _b2

 # Repeat these a couple times
make -C _build content
make -C _b2 content

find -name rst
./_b2/rst
./_b2/rst/doc/_build/rst
./_b2/rst/doc/_build/rst/doc/_b2/rst
./_b2/rst/doc/_build/rst/doc/_b2/rst/doc/_build/rst

Signed-off-by: Marc Herbert <marc.herbert@intel.com>
2020-10-21 06:27:22 -05:00

230 lines
9.0 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (c) 2018, Foundries.io Ltd
# Copyright (c) 2018, Nordic Semiconductor ASA
# Copyright (c) 2017, Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Internal script used by the documentation's build system to create
# the "final" docs tree which is then compiled by Sphinx.
#
# This works around the fact that Sphinx needs a single documentation
# root directory, while Zephyr's documentation files are spread around
# the tree.
import argparse
import collections
import fnmatch
import os
from os import path
import re
import shutil
import sys
# directives to parse for included files
DIRECTIVES = ["figure", "include", "image", "literalinclude"]
# A simple namedtuple for a generated output file.
#
# - src: source file, what file should be copied (in source directory)
# - dst: destination file, path it should be copied to (in build directory)
Output = collections.namedtuple('Output', 'src dst')
# Represents the content which must be extracted from the Zephyr tree,
# as well as the output directories needed to contain it.
#
# - outputs: list of Output objects for extracted content.
# - output_dirs: set of directories which must exist to contain
# output destination files.
Content = collections.namedtuple('Content', 'outputs output_dirs')
def src_deps(zephyr_base, src_file, dest, src_root):
# - zephyr_base: the ZEPHYR_BASE directory containing src_file
# - src_file: path to a source file in the documentation
# - dest: path to the top-level output/destination directory
# - src_root: path to the Sphinx top-level source directory
#
# Return a list of Output objects which contain src_file's
# additional dependencies, as they should be copied into
# dest. Output paths inside dest are based on each
# dependency's relative path from zephyr_base.
# Inspect only .rst files for directives referencing other files
# we'll need to copy (as configured in the DIRECTIVES variable)
if not src_file.endswith(".rst"):
return []
# Load the file's contents, bailing on decode errors.
try:
with open(src_file, encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError as e:
# pylint: disable=unsubscriptable-object
sys.stderr.write(
"Malformed {} in {}\n"
" Context: {}\n"
" Problematic data: {}\n"
" Reason: {}\n".format(
e.encoding, src_file,
e.object[max(e.start - 40, 0):e.end + 40],
e.object[e.start:e.end],
e.reason))
return []
# Source file's directory.
src_dir = path.dirname(src_file)
# Destination directory for any dependencies.
dst_dir = path.join(dest, path.relpath(src_dir, start=zephyr_base))
# Find directives in the content which imply additional
# dependencies. We assume each such directive takes a single
# argument, which is a (relative) path to the additional
# dependency file.
directives = "|".join(DIRECTIVES)
pattern = re.compile(r"\.\.\s+(?P<directive>%s)::\s+(?P<dep_rel>[^\s]+)" %
directives)
deps = []
for m in pattern.finditer(content):
dep_rel = m.group('dep_rel') # relative to src_dir or absolute
dep_src = path.abspath(path.join(src_dir, dep_rel))
if path.isabs(dep_rel):
# Not a relative path, check if it's absolute if we have been
# provided with a sphinx source directory root
if not src_root:
print("Absolute path to file:", dep_rel, "\n referenced by:",
src_file, "with no --sphinx-src-root", file=sys.stderr)
continue
# Make it really relative
dep_rel = '.' + dep_rel
dep_src = path.abspath(path.join(src_root, dep_rel))
if path.isfile(dep_src):
# File found, but no need to copy it since it's part
# of Sphinx's top-level source directory
continue
if not path.isfile(dep_src):
print("File not found:", dep_src, "\n referenced by:",
src_file, file=sys.stderr)
continue
dep_dst = path.abspath(path.join(dst_dir, dep_rel))
deps.append(Output(dep_src, dep_dst))
return deps
def find_content(zephyr_base, src, dest, fnfilter, ignore, src_root):
# Create a list of Outputs to copy over, and new directories we
# might need to make to contain them. Don't copy any files or
# otherwise modify dest.
outputs = []
output_dirs = set()
for dirpath, dirnames, filenames in os.walk(path.join(zephyr_base, src)):
# Limit the rest of the walk to subdirectories that aren't ignored.
dirnames[:] = [d for d in dirnames if not
path.normpath(path.join(dirpath, d)).startswith(ignore)]
# Exclude (other) build directories. They may contain previous
# output from ourselves!
dirnames[:] = [d for d in dirnames if not
path.exists(path.join(dirpath, d, 'CMakeCache.txt'))]
# If the current directory contains no matching files, keep going.
sources = fnmatch.filter(filenames, fnfilter)
if not sources:
continue
# There are sources here; track that the output directory
# needs to exist.
dst_dir = path.join(dest, path.relpath(dirpath, start=zephyr_base))
output_dirs.add(path.abspath(dst_dir))
# Initialize an Output for each source file, as well as any of
# that file's additional dependencies. Make sure output
# directories for dependencies are tracked too.
for src_rel in sources:
src_abs = path.join(dirpath, src_rel)
deps = src_deps(zephyr_base, src_abs, dest, src_root)
for depdir in (path.dirname(d.dst) for d in deps):
output_dirs.add(depdir)
outputs.extend(deps)
outputs.append(Output(src_abs,
path.abspath(path.join(dst_dir, src_rel))))
return Content(outputs, output_dirs)
def extract_content(content):
# Ensure each output subdirectory exists.
for d in content.output_dirs:
os.makedirs(d, exist_ok=True)
# Create each output file. Use copy2() to avoid updating
# modification times unnecessarily, as this triggers documentation
# rebuilds.
for output in content.outputs:
shutil.copy2(output.src, output.dst)
def main():
parser = argparse.ArgumentParser(
description='''Recursively copy documentation files from ZEPHYR_BASE to
a destination folder, along with files referenced in those .rst files
by a configurable list of directives: {}. The ZEPHYR_BASE environment
variable is used to determine source directories to copy files
from.'''.format(DIRECTIVES))
parser.add_argument('--outputs',
help='If given, save input/output files to this path')
parser.add_argument('--just-outputs', action='store_true',
help='''Skip extraction and just list outputs.
Cannot be given without --outputs.''')
parser.add_argument('--ignore', action='append',
help='''Source directories to ignore when copying
files. This may be given multiple times.''')
parser.add_argument('--sphinx-src-root',
help='''If given, absolute paths for dependencies are
resolved using this root, which is the Sphinx top-level
source directory as passed to sphinx-build.''')
parser.add_argument('content_config', nargs='+',
help='''A glob:source:destination specification
for content to extract. The "glob" is a documentation
file name pattern to include, "source" is a source
directory to search for such files in, and
"destination" is the directory to copy it into.''')
args = parser.parse_args()
if "ZEPHYR_BASE" not in os.environ:
sys.exit("ZEPHYR_BASE environment variable undefined.")
zephyr_base = os.environ["ZEPHYR_BASE"]
if not args.ignore:
ignore = ()
else:
ignore = tuple(path.normpath(ign) for ign in args.ignore)
if args.just_outputs and not args.outputs:
sys.exit('--just-outputs cannot be given without --outputs')
content_config = [cfg.split(':', 2) for cfg in args.content_config]
outputs = set()
for fnfilter, source, dest in content_config:
content = find_content(zephyr_base, source, dest, fnfilter, ignore,
args.sphinx_src_root)
if not args.just_outputs:
extract_content(content)
outputs |= set(content.outputs)
if args.outputs:
with open(args.outputs, 'w') as f:
for o in outputs:
print(o.src, file=f, end='\n')
print(o.dst, file=f, end='\n')
if __name__ == "__main__":
main()