A class to sort files from a source directory to a destination directory
| Attributes: |
-
source_dir
(str)
–
-
destination_dir
(str)
–
The destination directory
|
Methods:
Examples:
file_sorter = FileSorter("source_dir", "destination_dir")
file_sorter.copy_files()
Source code in pkrfilesorter/file_sorter.py
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122 | class FileSorter:
"""
A class to sort files from a source directory to a destination directory
Attributes:
source_dir (str): The source directory
destination_dir (str): The destination directory
Methods:
get_source_files: Get all txt files in the source directory and its subdirectories
get_date: Get the date of the file
get_destination_path: Get the destination path of the file
get_source_path: Get the absolute source directory path of the file
check_file_exists: Check if the file already exists in the destination directory
copy_files: Copy all files from the source directory to the destination directory
Examples:
file_sorter = FileSorter("source_dir", "destination_dir")
file_sorter.copy_files()
"""
def __init__(self, source_dir: str, destination_dir: str):
self.source_dir = source_dir
self.destination_dir = destination_dir
def get_source_files(self) -> list[dict]:
"""
Get all txt files in the source directory and its subdirectories
Returns:
files_dict (list[dict]): A list of dictionaries containing the root directory and filename of the files
"""
files_dict = [{"root": root, "filename": file}
for root, _, files in os.walk(self.source_dir) for file in files if file.endswith(".txt")]
return files_dict
def correct_source_files(self) -> list[dict]:
files_dict = self.get_source_files()
corrupted_files = [file for file in files_dict if file.get("filename").startswith("summary")]
# Change the filename of the corrupted files
for file in corrupted_files:
new_filename = file.get("filename")[7:]
base_path = os.path.join(file.get("root"), file.get("filename"))
new_path = os.path.join(file.get("root"), new_filename)
os.rename(base_path, new_path)
print(f"File {base_path} renamed to {new_filename}")
@staticmethod
def get_date(filename: str) -> str:
"""
Get the date of the file
Args:
filename (str): The filename of the file
Returns:
date_path (str): The date path of the file
"""
date_str = filename.split("_")[0]
date_path = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:]}"
return date_path
def get_destination_path(self, filename: str) -> str:
"""
Get the destination path of the file
Args:
filename (str): The filename of the file
Returns:
destination_path (str): The destination path of the file
"""
date_path = self.get_date(filename)
file_type = "summaries" if "summary" in filename else "histories/raw"
destination_path = os.path.join(self.destination_dir, file_type, date_path, filename)
return destination_path
def get_source_path(self, filename: str) -> str:
"""
Get the absolute source directory path of the file
Args:
filename (str): The filename of the file
Returns:
source_path (str): The source path of the file
"""
source_path = os.path.join(self.source_dir, filename)
return source_path
def check_file_exists(self, filename: str) -> bool:
"""
Check if the file already exists in the destination directory
Args:
filename (str): The filename to check
Returns:
(bool): True if the file already exists, False otherwise
"""
return os.path.exists(self.get_destination_path(filename))
def copy_files(self):
"""
Copy all files from the source directory to the destination directory
"""
for file in self.get_source_files():
file_root = file.get("root")
filename = file.get("filename")
source_path = os.path.join(file_root, filename)
destination_path = self.get_destination_path(filename)
if "positioning_file" not in filename:
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
if not (self.check_file_exists(filename) or "positioning_file" in filename):
with open(source_path, "r", encoding="utf-8") as source_file:
with open(destination_path, "w", encoding="utf-8") as destination_file:
destination_file.write(source_file.read())
print(f"File {filename} copied to {destination_path}")
|
check_file_exists(filename)
Check if the file already exists in the destination directory
| Returns: |
-
bool
–
True if the file already exists, False otherwise
|
Source code in pkrfilesorter/file_sorter.py
95
96
97
98
99
100
101
102
103
104
105 | def check_file_exists(self, filename: str) -> bool:
"""
Check if the file already exists in the destination directory
Args:
filename (str): The filename to check
Returns:
(bool): True if the file already exists, False otherwise
"""
return os.path.exists(self.get_destination_path(filename))
|
copy_files()
Copy all files from the source directory to the destination directory
Source code in pkrfilesorter/file_sorter.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122 | def copy_files(self):
"""
Copy all files from the source directory to the destination directory
"""
for file in self.get_source_files():
file_root = file.get("root")
filename = file.get("filename")
source_path = os.path.join(file_root, filename)
destination_path = self.get_destination_path(filename)
if "positioning_file" not in filename:
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
if not (self.check_file_exists(filename) or "positioning_file" in filename):
with open(source_path, "r", encoding="utf-8") as source_file:
with open(destination_path, "w", encoding="utf-8") as destination_file:
destination_file.write(source_file.read())
print(f"File {filename} copied to {destination_path}")
|
get_date(filename)
staticmethod
Get the date of the file
| Returns: |
-
date_path( str
) –
The date path of the file
|
Source code in pkrfilesorter/file_sorter.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65 | @staticmethod
def get_date(filename: str) -> str:
"""
Get the date of the file
Args:
filename (str): The filename of the file
Returns:
date_path (str): The date path of the file
"""
date_str = filename.split("_")[0]
date_path = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:]}"
return date_path
|
get_destination_path(filename)
Get the destination path of the file
| Returns: |
-
destination_path( str
) –
The destination path of the file
|
Source code in pkrfilesorter/file_sorter.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80 | def get_destination_path(self, filename: str) -> str:
"""
Get the destination path of the file
Args:
filename (str): The filename of the file
Returns:
destination_path (str): The destination path of the file
"""
date_path = self.get_date(filename)
file_type = "summaries" if "summary" in filename else "histories/raw"
destination_path = os.path.join(self.destination_dir, file_type, date_path, filename)
return destination_path
|
get_source_files()
Get all txt files in the source directory and its subdirectories
| Returns: |
-
files_dict( list[dict]
) –
A list of dictionaries containing the root directory and filename of the files
|
Source code in pkrfilesorter/file_sorter.py
30
31
32
33
34
35
36
37
38
39 | def get_source_files(self) -> list[dict]:
"""
Get all txt files in the source directory and its subdirectories
Returns:
files_dict (list[dict]): A list of dictionaries containing the root directory and filename of the files
"""
files_dict = [{"root": root, "filename": file}
for root, _, files in os.walk(self.source_dir) for file in files if file.endswith(".txt")]
return files_dict
|
get_source_path(filename)
Get the absolute source directory path of the file
| Returns: |
-
source_path( str
) –
The source path of the file
|
Source code in pkrfilesorter/file_sorter.py
82
83
84
85
86
87
88
89
90
91
92
93 | def get_source_path(self, filename: str) -> str:
"""
Get the absolute source directory path of the file
Args:
filename (str): The filename of the file
Returns:
source_path (str): The source path of the file
"""
source_path = os.path.join(self.source_dir, filename)
return source_path
|