zephyr/scripts/build/subfolder_list.py
Lingao Meng 302422ad9d everywhere: replace double words
import os
import re

common_words = set([
    'about', 'after', 'all', 'also', 'an', 'and',
     'any', 'are', 'as', 'at',
    'be', 'because', 'but', 'by', 'can', 'come',
    'could', 'day', 'do', 'even',
    'first', 'for', 'get', 'give', 'go', 'has',
    'have', 'he', 'her',
    'him', 'his', 'how', 'I', 'in', 'into', 'it',
    'its', 'just',
    'know', 'like', 'look', 'make', 'man', 'many',
    'me', 'more', 'my', 'new',
    'no', 'not', 'now', 'of', 'one', 'only', 'or',
    'other', 'our', 'out',
    'over', 'people', 'say', 'see', 'she', 'so',
    'some', 'take', 'tell', 'than',
    'their', 'them', 'then', 'there', 'these',
    'they', 'think',
    'this', 'time', 'two', 'up', 'use', 'very',
    'want', 'was', 'way',
    'we', 'well', 'what', 'when', 'which', 'who',
    'will', 'with', 'would',
    'year', 'you', 'your'
])

valid_extensions = set([
    'c', 'h', 'yaml', 'cmake', 'conf', 'txt', 'overlay',
    'rst', 'dtsi',
    'Kconfig', 'dts', 'defconfig', 'yml', 'ld', 'sh', 'py',
    'soc', 'cfg'
])

def filter_repeated_words(text):
    # Split the text into lines
    lines = text.split('\n')

    # Combine lines into a single string with unique separator
    combined_text = '/*sep*/'.join(lines)

    # Replace repeated words within a line
    def replace_within_line(match):
        return match.group(1)

    # Regex for matching repeated words within a line
    within_line_pattern =
	re.compile(r'\b(' +
		'|'.join(map(re.escape, common_words)) +
		r')\b\s+\b\1\b')
    combined_text = within_line_pattern.
		sub(replace_within_line, combined_text)

    # Replace repeated words across line boundaries
    def replace_across_lines(match):
        return match.group(1) + match.group(2)

    # Regex for matching repeated words across line boundaries
    across_lines_pattern = re.
		compile(r'\b(' + '|'.join(
			map(re.escape, common_words)) +
			r')\b(\s*[*\/\n\s]*)\b\1\b')
    combined_text = across_lines_pattern.
		sub(replace_across_lines, combined_text)

    # Split the text back into lines
    filtered_text = combined_text.split('/*sep*/')

    return '\n'.join(filtered_text)

def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()

    new_text = filter_repeated_words(text)

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(new_text)

def process_directory(directory_path):
    for root, dirs, files in os.walk(directory_path):
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        for file in files:
            # Filter out hidden files
            if file.startswith('.'):
                continue
            file_extension = file.split('.')[-1]
            if
	file_extension in valid_extensions:  # 只处理指定后缀的文件
                file_path = os.path.join(root, file)
                print(f"Processed file: {file_path}")
                process_file(file_path)

directory_to_process = "/home/mi/works/github/zephyrproject/zephyr"
process_directory(directory_to_process)

Signed-off-by: Lingao Meng <menglingao@xiaomi.com>
2024-06-25 06:05:35 -04:00

120 lines
3.6 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
"""Write subfolder list to a file
This script will walk the specified directory and write the file specified with
the list of all sub-directories found. If the output file already exists, the
file will only be updated in case sub-directories have been added or removed
since the previous invocation.
"""
import os
import argparse
def parse_args():
"""Parse command line arguments and options"""
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
allow_abbrev=False)
parser.add_argument('-d', '--directory', required=True,
help='Directory to walk for sub-directory discovery')
parser.add_argument('-c', '--create-links', required=False,
help='Create links for each directory found in \
directory given')
parser.add_argument('-o', '--out-file', required=True,
help='File to write containing a list of all \
directories found')
parser.add_argument('-t', '--trigger-file', required=False,
help='Trigger file to be touched to re-run CMake')
args = parser.parse_args()
return args
def get_subfolder_list(directory, create_links=None):
"""Return subfolder list of a directory"""
dirlist = []
if create_links is not None:
if not os.path.exists(create_links):
os.makedirs(create_links)
symbase = os.path.basename(directory)
symlink = create_links + os.path.sep + symbase
if not os.path.exists(symlink):
os.symlink(directory, symlink)
dirlist.append(symlink)
else:
dirlist.append(directory)
for root, dirs, _ in os.walk(directory, topdown=True):
dirs.sort()
for subdir in dirs:
if create_links is not None:
targetdirectory = os.path.join(root, subdir)
reldir = os.path.relpath(targetdirectory, directory)
linkname = symbase + '_' + reldir.replace(os.path.sep, '_')
symlink = create_links + os.path.sep + linkname
if not os.path.exists(symlink):
os.symlink(targetdirectory, symlink)
dirlist.append(symlink)
else:
dirlist.append(os.path.join(root, subdir))
return dirlist
def gen_out_file(out_file, dirs):
"""Generate file with the list of directories
File won't be updated if it already exists and has the same content
"""
dirs_nl = "\n".join(dirs) + "\n"
if os.path.exists(out_file):
with open(out_file, 'r', encoding="utf-8") as out_file_fo:
out_file_dirs = out_file_fo.read()
if out_file_dirs == dirs_nl:
return
with open(out_file, 'w', encoding="utf-8") as out_file_fo:
out_file_fo.writelines(dirs_nl)
def touch(trigger):
"""Touch the trigger file
If no trigger file is provided then do a return.
"""
if trigger is None:
return
if os.path.exists(trigger):
os.utime(trigger, None)
else:
with open(trigger, 'w') as trigger_fo:
trigger_fo.write("")
def main():
"""Parse command line arguments and take respective actions"""
args = parse_args()
dirs = get_subfolder_list(args.directory, args.create_links)
gen_out_file(args.out_file, dirs)
# Always touch trigger file to ensure json files are updated
touch(args.trigger_file)
if __name__ == "__main__":
main()