kp-segtag-image/segtag.py at main · formulake/kp-segtag-image · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import gradio as gr
import os
import cv2
import mediapipe as mp

print("Starting script...")

# Set the GRADIO_SERVER_PORT environment variable
os.environ["GRADIO_SERVER_PORT"] = "7850"

shot_types = ["upper_body", "cowboy_shot", "close_up", "portrait", "full_body", "unknown"]


def detect_damaged_eyes(image_path, blur_threshold):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")
    eyes = eye_cascade.detectMultiScale(gray, 1.3, 5)

    value = 0  # Initialize the value variable here

    for (ex, ey, ew, eh) in eyes:
        roi_gray = gray[ey:ey+eh, ex:ex+ew]
        value = cv2.Laplacian(roi_gray, cv2.CV_64F).var()
        if value < blur_threshold:  # Use the dynamic threshold
            return True
    return False

mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.2)

def detect_face(img):
    image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = face_detection.process(image_rgb)
    if results.detections:
        # Calculate face size as a fraction of total image area
        h, w, _ = img.shape
        face_h = results.detections[0].location_data.relative_bounding_box.height * h
        face_w = results.detections[0].location_data.relative_bounding_box.width * w
        face_size = face_h * face_w / (h * w)
        return True, face_size
    return False, 0

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

def pose_estimation(img):
    image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)
    if results.pose_landmarks:
        keypoints = {}
        for idx, landmark in enumerate(results.pose_landmarks.landmark):
            keypoints[f"landmark_{idx}"] = (landmark.x, landmark.y, landmark.visibility)  # Include visibility
        return keypoints
    return None


mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

def detect_body_with_holistic(img):
    image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(image_rgb)

    if results.pose_landmarks:
        return True
    return False

def classify_shot_type(img_path):
    img = cv2.imread(img_path)

    # Detect face and its size
    has_face, face_size = detect_face(img)

    # Close-Up: Only the face or head and neck are visible
    if has_face and face_size > 0.5:  # Adjusted threshold
        return "close_up"

    # Portrait: Only head and chest
    if has_face and face_size > 0.2:  # Adjusted threshold
        return "portrait"

    # Estimate pose keypoints
    keypoints = pose_estimation(img)

    # If no keypoints detected, default to "unknown"
    if not keypoints:
        return "unknown"

    # Check visibility of keypoints
    visibility_threshold = 0.8  # You can adjust this threshold
    keypoints_visible = {k: v for k, v in keypoints.items() if v[2] > visibility_threshold}  # v[2] is the visibility

    # Check for keypoints
    nose = keypoints_visible.get("landmark_0", None)
    left_hip = keypoints_visible.get("landmark_11", None)
    right_hip = keypoints_visible.get("landmark_12", None)
    left_ankle = keypoints_visible.get("landmark_15", None)
    right_ankle = keypoints_visible.get("landmark_16", None)
    left_shoulder = keypoints_visible.get("landmark_5", None)
    right_shoulder = keypoints_visible.get("landmark_6", None)

    # Cowboy Shot: From thighs to head, not including feet or ankles
    if nose and left_hip and right_hip and not left_ankle and not right_ankle:
        return "cowboy_shot"

    # Upper Body: From head to hips or waist
    if nose and left_hip and right_hip and left_shoulder and right_shoulder and not left_ankle and not right_ankle:
        return "upper_body"

    # Full Body: All major keypoints from head to ankles are detected
    if nose and left_hip and right_hip and left_shoulder and right_shoulder and left_ankle and right_ankle:
        return "full_body"

    # Default to "unknown" if no other classifications match
    return "unknown"


def segregate_images(source_folder, upper_body, cowboy, close_up, portrait, full_body, blur_threshold):
    # Create necessary directories within the source folder
    folders = ["damaged_eyes"] + shot_types
    for folder in folders:
        if not os.path.exists(os.path.join(source_folder, folder)):
            os.mkdir(os.path.join(source_folder, folder))

    for root, dirs, files in os.walk(source_folder):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root, file)
                if detect_damaged_eyes(img_path, blur_threshold):
                    os.rename(img_path, os.path.join(source_folder, "damaged_eyes", file))
                else:
                    classification = classify_shot_type(img_path)
                    os.rename(img_path, os.path.join(source_folder, classification, file))

    return f"Images from {source_folder} have been processed."

holistic.close()

# Define the functions for the blocks
def combined_functions(source_folder, upper_body, cowboy, close_up, portrait, full_body, blur_threshold,
                       hairstyle_input, clothing_input, wd14_input):
    angle_output = segregate_images(source_folder, upper_body, cowboy, close_up, portrait, full_body, blur_threshold)
    hairstyle_output = hairstyle_segregator(hairstyle_input)
    clothing_output = clothing_segregator(clothing_input)
    wd14_output = wd14_tagging(wd14_input)
    return angle_output, hairstyle_output, clothing_output, wd14_output

# Create the interface
interface = gr.Interface(
    fn=combined_functions,
    inputs=[
        gr.Textbox(label="Angle Segregator: Provide the path to your source folder here. All images to be segregated need to be present in the source folder. Subfolders will not be scanned."),
        gr.Checkbox(label="Upper Body"),
        gr.Checkbox(label="Cowboy"),
        gr.Checkbox(label="Close-Up"),
        gr.Checkbox(label="Portrait"),
        gr.Checkbox(label="Full Body"),
        gr.Slider(minimum=0, maximum=100, label="Blur Threshold for Eyes"),
        gr.Textbox(label="Placeholder input for Hairstyle Segregator"),
        gr.Textbox(label="Placeholder input for Clothing Segregator"),
        gr.Textbox(label="Placeholder input for WD14 Tagging")
    ],
    outputs=[
        gr.Textbox(label="Angle Segregator Output"),
        gr.Textbox(label="Hairstyle Segregator Output"),
        gr.Textbox(label="Clothing Segregator Output"),
        gr.Textbox(label="WD14 Tagging Output")
    ],
    live=False
)

# Launch the interface
interface.launch(server_port=7850)

# This script will group the inputs and outputs together in a single page. You can adjust the layout and appearance as needed.