From 8debb322ca0ab04030d53ae8bc6938d2245684db Mon Sep 17 00:00:00 2001 From: Simon Erhardt Date: Sat, 22 Mar 2025 21:12:53 +0100 Subject: [PATCH 1/4] fix: url handling for links --- linuxdir2html/linuxdir2html.py | 122 ++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 54 deletions(-) diff --git a/linuxdir2html/linuxdir2html.py b/linuxdir2html/linuxdir2html.py index bfb0870..3f49e61 100644 --- a/linuxdir2html/linuxdir2html.py +++ b/linuxdir2html/linuxdir2html.py @@ -25,23 +25,22 @@ import re # Most of the following variables are to replace placeholders in template.html -appName = "LinuxDir2HTML" -app_ver = "1.6.1" -gen_date = datetime.datetime.now().strftime("%m/%d/%Y") -gen_time = datetime.datetime.now().strftime("%H:%M") -app_link = "https://github.com/homeisfar/LinuxDir2HTML" -dir_data = "" -total_numFiles = 0 -total_numDirs = 0 -grand_total_size= 0 -file_links = "false" # This is a string b/c it's used in the html template. -link_protocol = "file://" -include_hidden = False -follow_symlink = False -dir_results = [] -childList_names = [] # names supplied from --child options -startsList_names = [] # dir's generated from --startsfrom options -# linkRoot = "/" # [LINK ROOT] is fixed as '' (see generateHTML) +appName = "LinuxDir2HTML" +app_ver = "1.6.1" +gen_date = datetime.datetime.now().strftime("%m/%d/%Y") +gen_time = datetime.datetime.now().strftime("%H:%M") +app_link = "https://github.com/homeisfar/LinuxDir2HTML" +dir_data = "" +total_numFiles = 0 +total_numDirs = 0 +grand_total_size = 0 +file_links = "false" # This is a string b/c it's used in the html template. +link_protocol = "" +include_hidden = False +follow_symlink = False +dir_results = [] +childList_names = [] # names supplied from --child options +startsList_names = [] # dir's generated from --startsfrom options parser = argparse.ArgumentParser(description='Generate HTML view of the file system.\n') parser.add_argument('pathToIndex', help='Path of Directory to Index') @@ -50,15 +49,17 @@ parser.add_argument('--startswith', action='append', help='[DEPRECATED] Start of name(s) of children dirs to include') parser.add_argument('--hidden', help='Include hidden files (leading with .)', action="store_true") parser.add_argument('--links', help='Create links to files in HTML output', action="store_true") +parser.add_argument('--protocol', help='Protocol used for links (used with --links)', default="http://") parser.add_argument('--symlink', help='Follow symlinks. WARN: This can cause infinite loops.', action="store_true") parser.add_argument('-v', '--verbose', help='increase output verbosity. -v or -vv for more.', action="count") parser.add_argument('--silent', help='Suppress terminal output except on error.', action="store_true") parser.add_argument('--version', help='Print version and exit', action="version", version=app_ver) + def main(): - global include_hidden, file_links, childList_names, startsList_names, follow_symlink + global include_hidden, file_links, childList_names, startsList_names, follow_symlink, link_protocol args = parser.parse_args() - + ## Initialize logging facilities log_level = logging.WARNING if args.verbose: @@ -70,13 +71,16 @@ def main(): log_level = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%H:%M:%S', level=log_level) log_name = logging.getLevelName(logging.getLogger().getEffectiveLevel()) - logging.info( f'Logging Level {log_name}') - + logging.info(f'Logging Level {log_name}') + # Handle user input flags and options pathToIndex = args.pathToIndex title = args.outputfile if args.links: file_links = "true" + link_protocol = args.protocol + logging.info(f"Using protocol for links: {link_protocol}") + if args.hidden: include_hidden = True if args.symlink: @@ -88,11 +92,10 @@ def main(): if os.path.isdir(title): logging.error(f"Chosen output file [{title}] is a directory. Aborting.") exit(1) - logging.info(f"Creating file links is [{file_links}]") logging.info(f"Showing hidden items is [{include_hidden}]") logging.info(f"Following symlinks is [{follow_symlink}]") - + # check that no child or startswith arg include a path separator for child_val in args.child or []: if os.sep in child_val: @@ -104,7 +107,6 @@ def main(): logging.error(f"startswith argument [{start_val}] contains a path separator.") exit(1) startsList_names.append(os.path.normcase(start_val)) - # Time to do the real work. Generate array with our file & dir entries, # then generate the resulting HTML pathToIndex = Path(pathToIndex).resolve() @@ -114,20 +116,27 @@ def main(): generateHTML( dir_data, appName, app_ver, gen_date, gen_time, title, app_link, total_numFiles, total_numDirs, grand_total_size, file_links - ) + ) return - -def generateDirArray(root_dir): # root i.e. user-provided root path, not "/" + + +def generateDirArray(root_dir): # root i.e. user-provided root path, not "/" global dir_data, total_numFiles, total_numDirs, grand_total_size, \ - dir_results, childList_names, startsList_names + dir_results, childList_names, startsList_names id = 0 dirs_dictionary = {} - + + # Convert root_dir to string for path operations + root_str = str(root_dir) + # Ensure root_str does end with a separator to properly build path + if not root_str.endswith(os.sep): + root_str += os.sep + # We enumerate every unique directory, ignoring symlinks by default. first_iteration = True for current_dir, dirs, files in os.walk(root_dir, True, None, follow_symlink): - logging.debug( f'Walking Dir [{current_dir}]') - + logging.debug(f'Walking Dir [{current_dir}]') + # If --child or --startswith are used, only add the requested # directories. This will only be performed on the root_dir if first_iteration: @@ -135,49 +144,52 @@ def generateDirArray(root_dir): # root i.e. user-provided root path, not "/" if childList_names or startsList_names: selectDirs(current_dir, dirs, include_hidden) files = [] - if include_hidden is False: dirs[:] = [d for d in dirs if not d[0] == '.'] files = [f for f in files if not f[0] == '.'] - dirs = sorted(dirs, key=str.casefold) files = sorted(files, key=str.casefold) + # Create a relative path by removing root_dir from current_dir + rel_dir = str(current_dir) + if rel_dir.startswith(root_str): + rel_dir = rel_dir[len(root_str):] + elif rel_dir == root_str[:-1]: # If it's the root directory itself + rel_dir = "/" + # The key is the current dir, and the value is described as follows. # A four index array like so: # | 0 | 1 | 2 | 3 | # | id | file_attrs | dir total file size | sub dirs | - # [1] is an array with the current directory path and modification time, then + # [1] is an array with the current directory path and modification time, then # is followed by the directory's files. Each file has a size and modtime. # Id is unused but could be useful for future features. dirs_dictionary[current_dir] = [id, [], 0, ''] arr = dirs_dictionary[current_dir][1] dir_mod_time = int( - datetime.datetime.fromtimestamp( - os.path.getmtime(current_dir)).timestamp()) - arr.append(f'{json.dumps(current_dir)[1:-1]}\0000\0{dir_mod_time}') - + datetime.datetime.fromtimestamp( + os.path.getmtime(current_dir)).timestamp()) + arr.append(f'{json.dumps(rel_dir)[1:-1]}\0000\0{dir_mod_time}') ##### Enumerate FILES ##### total_size = 0 for file in files: full_file_path = os.path.join(current_dir, file) - + if os.path.isfile(full_file_path): - total_numFiles += 1 - file_size = os.path.getsize(full_file_path) + total_numFiles += 1 + file_size = os.path.getsize(full_file_path) if (os.path.islink(full_file_path)): file_size = os.lstat(full_file_path).st_size - total_size += file_size + total_size += file_size grand_total_size += file_size try: # Avoid possible invalid mtimes mod_time = int(datetime.datetime.fromtimestamp - (os.path.getmtime(full_file_path)).timestamp()) + (os.path.getmtime(full_file_path)).timestamp()) except: logging.warning(f'----fromtimestamp timestamp invalid [{full_file_path}]') mod_time = 1 arr.append(f'{json.dumps(file)[1:-1]}\0{file_size}\0{mod_time}') dirs_dictionary[current_dir][2] = total_size - ##### Enumerate DIRS ##### dir_links = '' for dir in dirs: @@ -189,7 +201,6 @@ def generateDirArray(root_dir): # root i.e. user-provided root path, not "/" dirs_dictionary[full_dir_path] = [id, [dir], 0, ''] dir_links += f'{id}\0' dirs_dictionary[current_dir][3] = dir_links[:-1] - ## Output format follows: # "FILE_PATH\00\0MODIFIED_TIME","FILE_NAME\0FILE_SIZE\0MODIFIED_TIME",DIR_SIZE,"DIR1\0DIR2..." # To get a practical sense of what this means, look at a generated output after using the program. @@ -202,6 +213,7 @@ def generateDirArray(root_dir): # root i.e. user-provided root path, not "/" dir_results.append(dir_data) return + # This function will execute only on the first iteration of the directory walk. # It only has an effect if --child or --startswith are used. def selectDirs(current_dir, dirs, include_hidden): @@ -211,11 +223,11 @@ def selectDirs(current_dir, dirs, include_hidden): if startsList_names: logging.warning(f'Using dirs starting with [{str(startsList_names)[1:-1]}]') if include_hidden: - hidden_dirs = ["."+d for d in startsList_names] + hidden_dirs = ["." + d for d in startsList_names] logging.warning(f'Hidden flag set. Using dirs starting with [{str(hidden_dirs)[1:-1]}]') - + desired_dirs = startsList_names + hidden_dirs - for i in range(len(dirs) -1, -1, -1): + for i in range(len(dirs) - 1, -1, -1): keep_dir = '?' for desired in desired_dirs: if re.match(desired, dirs[i], re.I) or dirs[i] in childList_names: @@ -226,10 +238,11 @@ def selectDirs(current_dir, dirs, include_hidden): logging.info(f'Dirs selected:\n{dirs}') return + def generateHTML( - dir_data, appName, app_ver, gen_date, gen_time, title, - app_link, numFiles, numDirs, grand_total_size, file_links - ): + dir_data, appName, app_ver, gen_date, gen_time, title, + app_link, numFiles, numDirs, grand_total_size, file_links +): template_file = open((Path(__file__).parent / 'template.html'), 'r', encoding="utf-8") output_file = open(f'{title}.html', 'w', encoding="utf-8", errors='xmlcharrefreplace') for line in template_file: @@ -237,7 +250,7 @@ def generateHTML( if '[DIR DATA]' in modified_line: for line in dir_results: try: # can error if encoding mismatch; can't fix, just report - # TODO: investigate if the above is true after v1.6.1 + # TODO: investigate if the above is true after v1.6.1 output_file.write(f'{line}') except: logging.warning(f'----output_file.write error [{line}]') @@ -254,12 +267,13 @@ def generateHTML( modified_line = modified_line.replace('[LINK FILES]', file_links) modified_line = modified_line.replace('[LINK PROTOCOL]', link_protocol) modified_line = modified_line.replace('[SOURCE ROOT]', '') - modified_line = modified_line.replace('[LINK ROOT]', '') + modified_line = modified_line.replace('[LINK ROOT]', '/') output_file.write(modified_line) template_file.close() output_file.close() logging.warning("Wrote output to: " + os.path.realpath(output_file.name)) return + if __name__ == '__main__': - main() + main() \ No newline at end of file From faf872698a3de8cf9a2ae6a721e44cc3bffb1fdf Mon Sep 17 00:00:00 2001 From: Simon Erhardt Date: Sat, 22 Mar 2025 21:20:47 +0100 Subject: [PATCH 2/4] fix: improve path building by setting source route as / --- linuxdir2html/linuxdir2html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linuxdir2html/linuxdir2html.py b/linuxdir2html/linuxdir2html.py index 3f49e61..37ab640 100644 --- a/linuxdir2html/linuxdir2html.py +++ b/linuxdir2html/linuxdir2html.py @@ -266,8 +266,8 @@ def generateHTML( modified_line = modified_line.replace('[TOT SIZE]', str(grand_total_size)) modified_line = modified_line.replace('[LINK FILES]', file_links) modified_line = modified_line.replace('[LINK PROTOCOL]', link_protocol) - modified_line = modified_line.replace('[SOURCE ROOT]', '') - modified_line = modified_line.replace('[LINK ROOT]', '/') + modified_line = modified_line.replace('[SOURCE ROOT]', '/') + modified_line = modified_line.replace('[LINK ROOT]', '') output_file.write(modified_line) template_file.close() output_file.close() From 11e3fec878792924c1cc1d95cfd07f03d4d2d629 Mon Sep 17 00:00:00 2001 From: Simon Erhardt Date: Sat, 22 Mar 2025 21:41:31 +0100 Subject: [PATCH 3/4] fix: use correct pattern for app_ver in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8a8f4e5..60c80ce 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ long_description = fh.read() version = re.search( - r'(app_ver = "(\d.\d.\d)")', + r'(app_ver = "(\d.\d.\d)")', open("linuxdir2html/linuxdir2html.py").read(), re.M ).group(2) From 802d418f29d5a6c963c4e47a30d8a3fea216402e Mon Sep 17 00:00:00 2001 From: Simon Erhardt Date: Sat, 22 Mar 2025 22:30:49 +0100 Subject: [PATCH 4/4] fix: use correct dir pattern --- linuxdir2html/linuxdir2html.py | 5 ++++- linuxdir2html/template.html | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/linuxdir2html/linuxdir2html.py b/linuxdir2html/linuxdir2html.py index 37ab640..b37e320 100644 --- a/linuxdir2html/linuxdir2html.py +++ b/linuxdir2html/linuxdir2html.py @@ -157,6 +157,9 @@ def generateDirArray(root_dir): # root i.e. user-provided root path, not "/" elif rel_dir == root_str[:-1]: # If it's the root directory itself rel_dir = "/" + if not rel_dir.startswith("/"): + rel_dir = "/" + rel_dir + # The key is the current dir, and the value is described as follows. # A four index array like so: # | 0 | 1 | 2 | 3 | @@ -266,7 +269,7 @@ def generateHTML( modified_line = modified_line.replace('[TOT SIZE]', str(grand_total_size)) modified_line = modified_line.replace('[LINK FILES]', file_links) modified_line = modified_line.replace('[LINK PROTOCOL]', link_protocol) - modified_line = modified_line.replace('[SOURCE ROOT]', '/') + modified_line = modified_line.replace('[SOURCE ROOT]', '') modified_line = modified_line.replace('[LINK ROOT]', '') output_file.write(modified_line) template_file.close() diff --git a/linuxdir2html/template.html b/linuxdir2html/template.html index 20b7a3e..95a10d7 100644 --- a/linuxdir2html/template.html +++ b/linuxdir2html/template.html @@ -1309,7 +1309,12 @@ let file_tmp = sTmp[0]; if( linkFiles ) { - const url = encodeURI(`${linkProtocol}${dir}/${filename}`); + let url; + if( dir === "/" ) { + url = encodeURI(`${linkProtocol}/${filename}`); + } else { + url = encodeURI(`${linkProtocol}${dir}/${filename}`); + } file_tmp = `${filename}`; }