import cv2 as cv import re import PIL from PIL import Image, ImageTk, ImageShow import pytesseract import numpy as np from pytesseract import Output import tkinter as tk from tkinter import * from tkinter import filedialog from tkinter import messagebox #from tkinter.tix import * #for balloon import webbrowser #print("Your opencv version is: " + cv2.__version__) rectwidth = 20; rectheight = 20; class CreateToolTip(object): # makes the balloon messeges def __init__(self, widget, text='widget info'): self.widget = widget self.text = text self.widget.bind("", self.enter) self.widget.bind("", self.close) def enter(self, event=None): x = y = 0 x, y, cx, cy = self.widget.bbox("insert") x += self.widget.winfo_rootx() + 25 y += self.widget.winfo_rooty() + 20 # creates a toplevel window self.tw = tk.Toplevel(self.widget) # Leaves only the label and removes the app window self.tw.wm_overrideredirect(True) self.tw.wm_geometry("+%d+%d" % (x, y)) label = tk.Label(self.tw, text=self.text, justify='left', background='yellow', relief='solid', borderwidth=1, font=("times", "8", "normal")) label.pack(ipadx=1) def close(self, event=None): if self.tw: self.tw.destroy() ############################################################################################ # Description: Takes the original image and grayscales the image and threasholds and returns # the cleaned image. # Input: Image # Output: Image ############################################################################################ def sanitization(img): # displays original #cv2.imshow('Original', frame) # turns image to greyscale gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # turns image to greyscale # displays greyscaled image #cv.imshow('Greyscale', gray) # blurs the image #blur = cv.GaussianBlur(gray, (5, 5), 0) #denoise = cv.fastNlMeansDenoising(gray) #improves acuracy sligtly # displays the blured and greyscaled images #cv.imshow('Greyscale+Blur', blur) # thresholds the image thresholded = cv.threshold(gray, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1] #Thresholds image #thresholded = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,11,2) # displays the blured, greyscaled and thresholds image #cv.imshow('Greyscale+Threshold', thresholded) return thresholded # def boxingcontours(orig, contours): #opencv testing code of contours # for cnt in contours: # x, y, w, h = cv.boundingRect(cnt) # if w > rectwidth and h > rectheight and w accuracythresh: # #(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) # (text, x1, y2, x2, y1) = (d['char'][i], d['left'][i], d['top'][i], d['right'][i], d['bottom'][i]) # cv.rectangle(orig, (x1, height - y1), (x2, height - y2), (0, 255, 0), 2) # # wordlength = len(d['text'][i]) # # print(wordlength) # # letterwidth = w/wordlength # # print(letterwidth) # # for j in range(wordlength): # # startx = int(x + (letterwidth * j)) # # width = int(startx + letterwidth) # # orig = cv.rectangle(orig, (startx, y), (width, y + h), (0, 255, 0), 2) # #orig = cv.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 0), 2) # print(text) # #cv.waitKey(0) # # cv.imshow('Boxed', orig) ############################################################################################ # Description: Reads the text in the image and puts a box around the text along with the # text that was found. # Input: sanitized image, original image, confidence threshold # Output: original image ############################################################################################ def worddetect(frame,orig, confidence): global result custom_config = r'--oem 3 -l eng --psm 6 ' # configuration of tesseract d = pytesseract.image_to_data(frame,lang='eng', output_type=Output.DICT, config=custom_config) # Runs OCR on image n_boxes = len(d['text']) result.delete('1.0',END) # clears textbox for i in range(n_boxes): if int(d['conf'][i]) > confidence: # Only displays words with certain confidence threshold (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) # coordinates of the text # cv.rectangle(orig, (x, y), (x+w, y+h), (255, 255, 255), -1)#covers text and overlays with what it reads # cv.putText(orig, d['text'][i], (x, y+12), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) cv.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 0), 2)#boxes text cv.putText(orig, d['text'][i], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)# writes text in top left of the box result.insert(END, d['text'][i]) # writes in textbox result.insert(END, ' ') # inserts space between words #print(d['text'][i]) #print(d['conf'][i]) #cv.waitKey(0) #cv.imshow('Boxed', orig) # #Main Picture does the OCR on images only no interface # frame = cv.imread('venv/Pictures/sign.PNG',1) # #print(frame) # sanitized = sanitization(frame) # # contours, _ = cv.findContours(sanitized, cv.RETR_TREE, cv.CHAIN_APPROX_NONE) # # cropped = boxingcontours(frame, contours) # # sanitizedagain = sanitization(cropped) # # worddetect(sanitizedagain, cropped) # worddetect(sanitized,frame) # # When everything done, release the capture # cv.waitKey(0) # cv.destroyAllWindows() # def worddetect2(frame,orig): # global result # custom_config = r'--oem 3 -l eng --psm 6 ' # d = pytesseract.image_to_data(frame,lang='eng', output_type=Output.DICT, config=custom_config) # n_boxes = len(d['text']) # #result.delete('1.0',END) # for i in range(n_boxes): # if int(d['conf'][i]) > 80: # (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) # # cv.rectangle(orig, (x, y), (x+w, y+h), (255, 255, 255), -1)#covers text and overlays with what it reads # # cv.putText(orig, d['text'][i], (x, y+12), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) # cv.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 0), 2)#boxes text and writes what it reads on top # cv.putText(orig, d['text'][i], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # #result.insert(END, d['text'][i]) # #result.insert(END, ' ') # #print(d['text'][i]) # #print(d['conf'][i]) # #cv.waitKey(0) # # cv.imshow('Boxed', orig) # # #Main Camera does camera feed without an interface # cap = cv.VideoCapture(0) # while (True):# Capture frame-by-frame # ret, frame = cap.read() # sanitized = sanitization(frame) # worddetect2(sanitized, frame) # if cv.waitKey(1) & 0xFF == ord('q'): # break # # When everything done, release the capture # cap.release() # cv.destroyAllWindows() #gui interface displays camera feed # width, height = 1080, 720 # cap = cv.VideoCapture(0) # cap.set(cv.CAP_PROP_FRAME_WIDTH, width) # cap.set(cv.CAP_PROP_FRAME_HEIGHT, height) # # root = Tk() # root.bind('', lambda e: root.quit()) # lmain = Label(root) # lmain.pack() # # def show_frame(): # ret, frame = cap.read() # if ret: # sanitized = sanitization(frame) # worddetect(sanitized, frame) # cvimage = cv.cvtColor(frame, cv.COLOR_BGR2RGBA) # img = PIL.Image.fromarray(cvimage) # imgtk = ImageTk.PhotoImage(image=img) # lmain.imgtk = imgtk # lmain.configure(image=imgtk) # lmain.after(10, show_frame) # # show_frame() # root.mainloop() ############################################################################################ # Description: Opens image and then resizes converts to array and displays the original and # detected word image on the screen when the Open Image button is pressed. # Input: Nothing # Output: Nothing ############################################################################################ def open_img(): global panelA, panelB, confslider, x, result, invert, freash #global variables x = filedialog.askopenfilename(title='Choose file') #opens a explorer window to search for images try: freash = PIL.Image.open(x) #opens the image and sets it to the variable except IOError: messagebox.showerror("ERROR", "INVALID DATA TYPE MUST BE JPG OR PNG") #displays error message if file type is wrong #print("invalid image type") hsize = freash.size[1] # gets the height of the image wsize = freash.size[0] # gets width of the image freash = freash.resize((int((350 / hsize) * wsize), 350), PIL.Image.ANTIALIAS) # uniformly scales the image orig = freash img = np.asarray(orig) # converts image to array #orig = ImageTk.PhotoImage(orig) img = cv.cvtColor(img, cv.COLOR_BGR2RGB) # changes BGR to RGB sanitized = sanitization(img) # runs sanitization on image if invert['text'] == "Revert": # inverts black and white if invert is selected sanitized = cv.bitwise_not(sanitized) worddetect(sanitized, img, confslider.get()) # runs OCR on image sanitized = cv.cvtColor(sanitized, cv.COLOR_RGB2BGR) # changes RGB to BGR orig = PIL.Image.fromarray(sanitized) # converts the image back orig = ImageTk.PhotoImage(orig) # changes format so that it works for tkinter if panelA is None: # creates an image in panelA if not created panelA = Label(root, image=orig) panelA.image = orig panelA.pack(side="left", padx=10, pady=10) else: panelA.configure(image=orig) # changes image to the new original panelA.image = orig img = cv.cvtColor(img, cv.COLOR_RGB2BGR) # changes RGB to BGR frame = PIL.Image.fromarray(img) # converts the image back frame = ImageTk.PhotoImage(frame) # changes format so that it works for tkinter if panelB is None: # creates and image in panelB if not created yet panelB = Label(image=frame) panelB.image = frame panelB.pack(side="right", padx=10, pady=10) else: panelB.configure(image=frame)# changes image to new modified image panelB.image = frame ############################################################################################ # Description: Similar to open_img but is called when variables of the OCR or sanitization # process are changed and then updates the graphical interface. # Input: threshold value # Output: Nothing ############################################################################################ def update(num): global panelA, panelB, confslider, x, result, invert, freash orig = freash # hsize = orig.size[1] # wsize = orig.size[0] # orig = orig.resize((int((350 / hsize) * wsize), 350), PIL.Image.ANTIALIAS) img = np.asarray(orig) # converts image to array img = cv.cvtColor(img, cv.COLOR_BGR2RGB) # changes BGR to RGB sanitized = sanitization(img) # sanitizes image if invert['text'] == "Revert": # reverts if selected sanitized = cv.bitwise_not(sanitized) worddetect(sanitized, img, confslider.get()) # runs OCR sanitized = cv.cvtColor(sanitized, cv.COLOR_RGB2BGR) # changes RGB to BGR orig = PIL.Image.fromarray(sanitized) # converts image back orig = ImageTk.PhotoImage(orig) # modifies image for Tkinter if panelA is None: # updates panelA panelA = Label(root, image=orig) panelA.image = orig panelA.pack(side="left", padx=10, pady=10) #tip.bind_widget(panelA, balloonmsg="Displays the sanitized image sent to the OCR algorithm.") panelA_ttp = CreateToolTip(panelA,"Displays the sanitized image sent to the OCR algorithm.") else: panelA.configure(image=orig) panelA.image = orig img = cv.cvtColor(img, cv.COLOR_RGB2BGR) # changes RGB to BGR frame = PIL.Image.fromarray(img) # converts the image back frame = ImageTk.PhotoImage(frame) # modifies the image for Tkinter if panelB is None: # updates panelB panelB = Label(image=frame) panelB.image = frame panelB.pack(side="right", padx=10, pady=10) #tip.bind_widget(panelB, balloonmsg="Displays the output of what is found using the OCR algorithm with the results boxed on the original image.") panelB_ttp = CreateToolTip(panelB, "Displays the output of what is found using the OCR algorithm with the results boxed on the original image.") else: panelB.configure(image=frame) panelB.image = frame ############################################################################################ # Description: Does the logic of the invert and revert button in GUI along with updating the # images. # Input: Nothing # Output: Nothing ############################################################################################ def inverter(): global panelA, panelB, confslider, x, result, invert, root, inversion, freash orig = freash # hsize = orig.size[1] # wsize = orig.size[0] # orig = orig.resize((int((350 / hsize) * wsize), 350), PIL.Image.ANTIALIAS) img = np.asarray(orig) # converts to array img = cv.cvtColor(img, cv.COLOR_BGR2RGB) # changes BGR to RGB sanitized = sanitization(img) # sanitizes image if invert['text'] == "Invert": # If inverts the it wil invert the image sanitized = cv.bitwise_not(sanitized) invert['text'] = "Revert" else: # if revert then change to invert invert['text'] = "Invert" worddetect(sanitized, img, confslider.get()) # runs OCR sanitized = cv.cvtColor(sanitized, cv.COLOR_RGB2BGR) # changes RGB to BGR orig = PIL.Image.fromarray(sanitized) # converts image back orig = ImageTk.PhotoImage(orig) # convert for Tkinter if panelA is None:# update panelA panelA = Label(root, image=orig) panelA.image = orig panelA.pack(side="left", padx=10, pady=10) else: panelA.configure(image=orig) panelA.image = orig img = cv.cvtColor(img, cv.COLOR_RGB2BGR) # changes RGB to BGR frame = PIL.Image.fromarray(img) # converts the image back frame = ImageTk.PhotoImage(frame) # converts for Tkinter if panelB is None: # update panelB panelB = Label(image=frame) panelB.image = frame panelB.pack(side="right", padx=10, pady=10) else: panelB.configure(image=frame) panelB.image = frame ############################################################################################ # Description: Grabs a new frame from the camera every 200 ms and runs the worddetect function # every new frame that is imputed. # Input: Nothing # Output: Nothing ############################################################################################ def videofeed(): global panelA, panelB, confslider, x, result, invert, root, inversion, freash, cameratoggle, btn, cap, vidcon if vidcon: # if bool is true how to stop video feed ret, frame = cap.read() # reads image img = np.asarray(frame) # converts to array img = cv.cvtColor(img, cv.COLOR_BGR2RGB) # to RGB freash = img if ret: # if there is an image from camera sanitized = sanitization(frame) # sanitize image if invert['text'] == "Revert": # keep inverted if inverted sanitized = cv.bitwise_not(sanitized) cvimage = cv.cvtColor(sanitized, cv.COLOR_BGR2RGBA) # to RGB img = PIL.Image.fromarray(cvimage) # converts image back imga = ImageTk.PhotoImage(image=img) # sanitized image in panelA panelA.configure(image=imga) panelA.image = imga worddetect(sanitized, frame, confslider.get()) # runs OCR cvimage2 = cv.cvtColor(frame, cv.COLOR_BGR2RGBA) # to RGB img2 = PIL.Image.fromarray(cvimage2) imgb = ImageTk.PhotoImage(image=img2) # applies OCR image to panelB panelB.configure(image=imgb) panelB.image = imgb panelB.after(200, videofeed) # event timer for 200 ms #videofeed(); # panelB.after(10, videofeed) # worddetect(sanitized, frame) # cvimage = cv.cvtColor(frame, cv.COLOR_BGR2RGBA) # img = PIL.Image.fromarray(cvimage) # imgtk = ImageTk.PhotoImage(image=img) # lmain.imgtk = imgtk # lmain.configure(image=imgtk) # lmain.after(10, show_frame) ############################################################################################ # Description: Does the logic for toggling between live video and pictures. # Input: Nothing # Output: Nothing ############################################################################################ def switchinput(): global panelA, panelB, confslider, x, result, invert, root, inversion, freash, cameratoggle, btn, vidcon, camerafreeze if cameratoggle['text'] == "Live Video": btn["state"] = DISABLED camerafreeze["state"] = NORMAL cameratoggle['text'] = "Picture" vidcon = True videofeed() else: btn["state"] = NORMAL camerafreeze["state"] = DISABLED cameratoggle['text'] = "Live Video" camerafreeze['text'] = "Freeze" vidcon = False open_img() ############################################################################################ # Description: Toggles the ability to freeze the live video feed and unfreeze. # Input: Nothing # Output: Nothing ############################################################################################ def freezetoggle(): global panelA, panelB, confslider, x, result, invert, root, inversion, freash, cameratoggle, btn, vidcon, camerafreeze if camerafreeze['text'] == "Freeze": camerafreeze['text'] = "Unfreeze" vidcon = False else: camerafreeze['text'] = "Freeze" vidcon = True videofeed() ############################################################################################ # Description: Does a google search of what ever text is found in the textbox. # Input: Nothing # Output: Nothing ############################################################################################ def websearch(): global panelA, panelB, confslider, x, result, invert, root, inversion, freash, cameratoggle, btn, vidcon, camerafreeze, search url = "https://www.google.com.tr/search?q={}".format(result.get(1.0, "end")) webbrowser.register('chrome',None,webbrowser.BackgroundBrowser("C://Program Files (x86)//Google//Chrome//Application//chrome.exe")) webbrowser.get('chrome').open(url) root = Tk() # creates tkinter window called root width, height = 525, 350 # resolution of video feed cap = cv.VideoCapture(0) # handle for camera cap.set(cv.CAP_PROP_FRAME_WIDTH, width) # applies width cap.set(cv.CAP_PROP_FRAME_HEIGHT, height) # applies height vidcon = True # video feed continue bool x = 'venv/Pictures/sign.PNG' # default image on start up freash = PIL.Image.open(x) # opens image hsize = freash.size[1] # gets height wsize = freash.size[0] # gets width freash = freash.resize((int((350 / hsize) * wsize), 350), PIL.Image.ANTIALIAS) # uniform scale inversion = False panelA = None panelB = None root.title("Image Text Recognition") # sets title of window # w, h = root.winfo_screenwidth(), root.winfo_screenheight() # root.geometry("%dx%d+0+0" % (w,h)) #root.attributes('-zoomed', True) root.state("zoomed") # opens application in full screen #root.geometry("1000x600") #root.resizable(width = True, height = True) #tip = Balloon(root) # applies balloon pop up messages on root search = Button (root, text = "Search", command = websearch ) # initalizes the search button search.pack(side="bottom", padx="10", pady="50") #tip.bind_widget(search, balloonmsg = "Does a google search on the results.") # creates pop-up text search_ttp = CreateToolTip(search, "Does a google search on the results.") confslider = Scale(root, from_=0, to=100,tickinterval=5, length=600, resolution=5, label = 'Confidence (%)', orient=HORIZONTAL, command = update) # initalizes the confidence slider confslider.pack(side="bottom", fill="both", expand="yes", padx="10", pady="0") #tip.bind_widget(confslider, balloonmsg = "Adjusts the confidence level threshold to improve or decrease the accuracy of the OCR algorithm. ") # creates pop-up text confslider_ttp = CreateToolTip(confslider, "Adjusts the confidence level threshold to improve or decrease the accuracy of the OCR algorithm. ") camerafreeze = Button (root, text = "Freeze", command = freezetoggle, state = DISABLED) # initalizes the freeze button camerafreeze.pack(side="bottom", padx="10", pady="0") #tip.bind_widget(camerafreeze, balloonmsg = "Allows for pictures to be taken when in live video feed mode.") # creates pop-up text camerafreeze_ttp = CreateToolTip(camerafreeze, "Allows for pictures to be taken when in live video feed mode.") cameratoggle = Button (root, text = "Live Video", command = switchinput) # initalizes the toggle button between live video and pictures cameratoggle.pack(side="bottom", padx="10", pady="15") #tip.bind_widget(cameratoggle, balloonmsg = "Toggles between live video feed and opening pictures already saved on your device.") # creates pop-up text cameratoggle_ttp = CreateToolTip(cameratoggle, "Toggles between live video feed and opening pictures already saved on your device.") invert = Button(root, text = "Invert", command = inverter) # initalizes the invert button invert.pack(side="bottom", padx="10", pady="20") #tip.bind_widget(invert, balloonmsg = "Changes all black to white and white to black in the image sent to the OCR algorithm.") # creates pop-up text invert_ttp = CreateToolTip(invert, "Changes all black to white and white to black in the image sent to the OCR algorithm.") btn = Button(root, text = 'Open Image', command = open_img ) # initalizes the open image button btn.pack(side="bottom", padx="10", pady="15") #tip.bind_widget(btn, balloonmsg = "Allows for the picture to be changed when in picture mode.") # creates pop-up text btn_ttp = CreateToolTip(btn, "Allows for the picture to be changed when in picture mode.") confslider.set(90) result = Text(root, height = "10") # initalizes the textbox with results result.pack(side="bottom",padx="10", pady="10") #tip.bind_widget(result, balloonmsg = "Google search bar of results from the image.") # creates pop-up text result_ttp = CreateToolTip(result, "Google search bar of results from the image.") root.mainloop() # game loop esencially to make the GUI reactive