when I trying to detect the objects of products and those names with the below code. Here I am using the cv2.putText() function, but getting the below error. Could anyone please help me.
from cProfile import label
from tkinter import font
import cv2
import numpy as np
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
#print(classes)
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] -1] for i in net.getUnconnectedOutLayers()]
img = cv2.imread("amz.jpg")
img = cv2.resize(img, None, fx=0.9, fy=0.9)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0,), True, crop=False)
for b in blob:
for n, img_blob in enumerate(b):
cv2.imshow(str(n), img_blob)
net.setInput(blob)
outp = net.forward(output_layers)
print(outp)
class_ids = []
confidences = []
boxes = []
for out in outp:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
#print(len(boxes))
number_object_detected = len(boxes)
#font = cv2.FONT_HERSHEY_PLAIN
font = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(boxes)):
x, y, w, h = boxes[i]
lable = classes[class_ids[i]]
print(lable)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(img, label, (x, y + 30), font, 1, (0, 0, 0), 3, cv2.LINE_AA, True
cv2.imshow("Image", img)
#img = cv2.resize(img, None, fx=0.9, fy=0.9)
cv2.waitKey(10000)`enter code here`
error:
Traceback (most recent call last):
File «C:/Users/Gajapati/PycharmProjects/yolo/yolo-opencv.py», line 59, in
cv2.putText(img, label, (x, y + 30), font, 1, (0, 0, 0), 3, cv2.LINE_AA, True)
SystemError: returned NULL without setting an error
when I trying to detect the objects of products and those names with the below code. Here I am using the cv2.putText() function, but getting the below error. Could anyone please help me.
from cProfile import label
from tkinter import font
import cv2
import numpy as np
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
#print(classes)
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] -1] for i in net.getUnconnectedOutLayers()]
img = cv2.imread("amz.jpg")
img = cv2.resize(img, None, fx=0.9, fy=0.9)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0,), True, crop=False)
for b in blob:
for n, img_blob in enumerate(b):
cv2.imshow(str(n), img_blob)
net.setInput(blob)
outp = net.forward(output_layers)
print(outp)
class_ids = []
confidences = []
boxes = []
for out in outp:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
#print(len(boxes))
number_object_detected = len(boxes)
#font = cv2.FONT_HERSHEY_PLAIN
font = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(boxes)):
x, y, w, h = boxes[i]
lable = classes[class_ids[i]]
print(lable)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(img, label, (x, y + 30), font, 1, (0, 0, 0), 3, cv2.LINE_AA, True
cv2.imshow("Image", img)
#img = cv2.resize(img, None, fx=0.9, fy=0.9)
cv2.waitKey(10000)`enter code here`
error:
Traceback (most recent call last):
File «C:/Users/Gajapati/PycharmProjects/yolo/yolo-opencv.py», line 59, in
cv2.putText(img, label, (x, y + 30), font, 1, (0, 0, 0), 3, cv2.LINE_AA, True)
SystemError: returned NULL without setting an error
System information (version)
- OpenCV => ❔ OpenCV => 4.1.1 (Nvidia Jetson xavier default)
- Operating System / Platform => ❔ => Ubuntu18.04 (arm64)
- Compiler => ❔ => gcc
Detailed description
Hello,
I am using a clone of yolov5 from the following address.
https://github.com/ultralytics/yolov5
For inference I used rtsp as the source as shown next.
python3 detect.py —source rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa
Fusing layers…
Model Summary: 232 layers, 7459581 parameters, 0 gradients
1/1: rtsp://root:root@10.10.237.2/cam0_2… success (320×240 at 0.00 FPS).
0: 480×640 3 persons, 1 tvs, Done. (7.801s)
Traceback (most recent call last):
File «detect.py», line 172, in
detect()
File «detect.py», line 118, in detect
cv2.imshow(p, im0)
SystemError: returned NULL without setting an error
terminate called without an active exception
The source code for that part is as follows.
================= source code =============================
import argparse
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh,
strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
dataset = LoadImages(source, img_size=imgsz)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
else:
p, s, im0 = Path(path), '', im0s
save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + 'n')
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
print('%sDone. (%.3fs)' % (s, t2 - t1))
# Stream results
if view_img:
cv2.imshow(p, im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
print('Results saved to %s' % save_dir)
print('Done. (%.3fs)' % (time.time() - t0))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
====================================================================
source: https://github.com/ultralytics/yolov5/blob/master/detect.py
Thank you.
Steps to reproduce
- clone https://github.com/ultralytics/yolov5
- pip install -r requirements.txt
- $ python detect.py —source 0 # webcam
file.jpg # image
file.mp4 # video
path/ # directory
path/*.jpg # glob
rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa # rtsp stream
rtmp://192.168.1.105/live/test # rtmp stream
http://112.50.243.8/PLTV/88888888/224/3221225900/1.m3u8 # http stream
Issue submission checklist
- I report the issue, it’s not a question
- I checked the problem with documentation, FAQ, open issues,
answers.opencv.org, Stack Overflow, etc and have not found solution - I updated to latest OpenCV version and the issue is still there
- There is reproducer code and related data files: videos, images, onnx, etc
Tools: Python3.7 (64 bit), Visual C++ 10.0
I am trying to create a C extension for Python. To start, I am testing a simple C code which prints a string and invokes the Sleep() function inside a for loop. However, when I make a simple call to this C function, named gen_nums, from Python, I get the following error:
“SystemError: built-in function gen_nums returned NULL without setting an error”
I think the problem is with the Sleep() function; deleting the “Sleep(1000)” part or placing it before “printf(“Printed from C thread…n”)” eliminates this error. I looked over the documentation for Sleep() but couldn’t find anything useful.
C Code:
#include <Python.h> static void gen_nums() { int i; for(i = 0; i < 10; i++) { printf("Printed from C thread...n"); Sleep(1000); } } static PyMethodDef gen_numsmethods[] = { {"gen_nums", gen_nums, METH_VARARGS, "This is a threading test"}, {NULL, NULL, 0, NULL} }; static struct PyModuleDef threadmod = { PyModuleDef_HEAD_INIT, "threadrun", "This is a thread test module", -1, gen_numsmethods }; PyMODINIT_FUNC PyInit_threadrun(void) { return PyModule_Create(&threadmod); }
Python Call:
threadrun.gen_nums() the C module is called threadrun
The result should be:
“Printed from C thread…” 10 times, with a 1 second interval between each statement.
However, the program prints the statement 10 times and then displays the aforementioned error.
Advertisement
Answer
The reason for the error is this: Python extension functions must have a certain C prototype:
PyObject *func(PyObject *self, PyObject *args)
The method slots contain function pointers of type
PyObject *(*)(Pyobject *, PyObject *)
The old way was to forcibly cast the function to this pointer type to be stored into the method slot. The explicit cast will silence the error of conversion of void (*)()
to PyObject *(*)(Pyobject *, PyObject *)
. The conversion is valid, but needs an explicit cast. If an explicit cast is not there, then a C compiler must issue a diagnostics message.
Your code does not have an explicit cast, hence you must get a warning for
{"gen_nums", gen_nums, METH_VARARGS, "This is a threading test"},
In any case, if there were an explicit cast, the program would still be a correct program up until when Python tries to invoke your function gen_nums()
, because Python will do so as if its prototype were
PyObject *gen_nums(PyObject *, PyObject *);
Now the C standard says that while everything was fine up to this point, from now on the behaviour of the program is undefined, because C11 6.3.2.3:
- A pointer to a function of one type may be converted to a pointer to a function of another type and back again; the result shall compare equal to the original pointer. If a converted pointer is used to call a function whose type is not compatible with the referenced type, the behavior is undefined.
Your function returns void, i.e. nothing at all, yet you ask “why it returns NULL
only when Sleep()
is in there. The reason is “undefined behaviour”.
As for how to fix this, please do read and understand the Chapter 1 of 1. Extending Python with C or C++ – there are plenty of details in there, but everything needed to fix this simple function is detailed in there. If you get stuck please do ask further questions but do refer to the documentation in questions.
The fix for that function would be to write it as
static PyObject *gen_nums(PyObject *self, PyObject *args) { int i; for(i = 0; i < 10; i++) { printf("Printed from C thread...n"); Sleep(1000); } Py_RETURN_NONE; }
9 People found this is helpful
Created on 2016-09-09 07:57 by mbussonn, last changed 2022-04-11 14:58 by admin. This issue is now closed.
I've been able to reliably trigger dict.pop to raise a system error: See https://github.com/pytest-dev/pytest/issues/1925 I have issues getting a small test-case that triggers it, though by changing the pop method of dict to print the returned value: PyObject * _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt){ ... printf("nreturn End value %xn", old_value ); return old_value; } I've been able to see that the last return sometime return Null: [snip] return End value 0 Fatal Python error: a function returned NULL without setting an error SystemError: <built-in method pop of dict object at 0x10473f238> returned NULL without setting an error Current thread 0x00007fff77139300 (most recent call first): File "/usr/local/lib/python3.6/site-packages/_pytest/capture.py", line 83 in reset_capturings File "/usr/local/lib/python3.6/site-packages/_pytest/config.py", line 869 in _ensure_unconfigure File "/usr/local/lib/python3.6/site-packages/_pytest/main.py", line 121 in wrap_session File "/usr/local/lib/python3.6/site-packages/_pytest/main.py", line 125 in pytest_cmdline_main File "/usr/local/lib/python3.6/site-packages/_pytest/vendored_packages/pluggy.py", line 596 in execute File "/usr/local/lib/python3.6/site-packages/_pytest/vendored_packages/pluggy.py", line 333 in <lambda> File "/usr/local/lib/python3.6/site-packages/_pytest/vendored_packages/pluggy.py", line 338 in _hookexec File "/usr/local/lib/python3.6/site-packages/_pytest/vendored_packages/pluggy.py", line 724 in __call__ File "/usr/local/lib/python3.6/site-packages/_pytest/config.py", line 57 in main File "/usr/local/lib/python3.6/site-packages/pytest.py", line 17 in <module> File "/Users/bussonniermatthias/dev/git-cpython/Lib/runpy.py", line 85 in _run_code File "/Users/bussonniermatthias/dev/git-cpython/Lib/runpy.py", line 193 in _run_module_as_main Aborted Which I suppose is not desirable. I'm quite uncomfortable with C so I'm far from being able to propose a patch or describe why this would happen... Victor Stinner seem to have made the last changes to these methods in http://bugs.python.org/issue27350 . Not sure if the etiquette is to add them to the nosy list in this case. Discovered because of nightly continuous integration with travis on github.com/xonsh/xonsh
Oh, also I've bisect it to the compact-dict commit, and the exact instance of __dict__.pop that fails depends on the machine. Reliable same location on travis-ci and locally, but travis-ci location and local differ on where it triggers this.
I've forgot to convert split table into combined table when del and .pop(). I'm sorry, and thanks to finding it.
Are you sure INADA? The previous dict implementation has the same constraint but does merge in dict_pop.
does should be does not. Sorry.
> Xiang Zhang added the comment: > > Are you sure INADA? The previous dict implementation has the same constraint but does merge in dict_pop. > Yes. New dict implementation preserves insertion order. class A: ... a, b = A(), A() a.a, a.b, a.c = 1, 2, 3 b.a, b.b, b.c = 4, 5, 6 del a.b # or a.pop('b') a.b = 7 assert list(a.__dict__) == ["a", "c", "b"] assert list(b.__dict__) == ["a", "b", "c"] This is difficult for key-sharing dict (aka. split table). We may be able to allow some simple del and pop operation for split table. But we should keep best balance between simplicity and efficiency. And we don't have enough time before 3.6b1.
> I'm sorry, and thanks to finding it. No worries, that's what nightly are for right ? And I only dug up a failing tests :-) Thanks to you for writing this ! > [snip] > This is difficult for key-sharing dict (aka. split table). I'm not going to pretend I understand the details, though I applied the latest patch: fix-compact-dict-deletion.patch methane, 2016-09-09 04:11 And it does fix this issue for me (at least on my local machine) Looking at the code the only thing I see is that you seem to return NULL in Pop and -1 in Clear. Not sure if this what you meant, I'm un familar with all this; but thanks a lot again for the your work and the quick response.
I would really appreciate if someone can write an unit test. I'm not confident to fix a bug without unit test, on such tricky part of Python internals (splitted/combined dict).
Added test which reproduce the issue on current master.
New changeset 4a5b61b0d090 by Victor Stinner in branch 'default': Fix SystemError in compact dict https://hg.python.org/cpython/rev/4a5b61b0d090
> Added test which reproduce the issue on current master. Oh, great :-) Thanks for the quick fix AND test. I pushed your latest change.
I'd like to reopen this one since I still don't think this change is needed and suggest to revert since this change make split table combined on deletion. Deletion will not alter split dict's order. Only insertion after deletion will. But this case is already handled in insertdict[0]. So I think we don't have to combine once an item is deleted from split dict. The example INADA gives works well with the previous implementation(before this change). The problem there is a SystemError I think is dict.pop() doesn't handle pending state (del dict does, so you can produce the same failure when try del dict[]). We only add `|| *value_addr == NULL` as delitem does. poc.patch reverts the change (preserve the tests, eliminating the size compare part) and add pending state handling in dict.pop(). It passes. And if you remove the pending state handling, you can product the SystemError. I'd like to know if my idea is totally wrong. :) [0] https://hg.python.org/cpython/file/tip/Objects/dictobject.c#l1057
Ohh, mistakes. > so you can produce can should be can not > We only add We only need to add > you can product the product should be produce
Xiang: Preserving insertion order in shared-key dict is possible, but it's hard. If we allow it, we should add tons of test for shared-key dict. I'll describe why it's harder than you think. Current implementation allow insertion to split table only when: /* When insertion order is different from shared key, we can't share * the key anymore. Convert this instance to combine table. */ if (_PyDict_HasSplitTable(mp) && ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) || (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) { if (insertion_resize(mp) < 0) { `ix >= 0 && *value_addr == NULL` means it's pending slot. `mp->ma_used == ix` ensure insertion order. And if we allow deletion, this check will be broken. For example: a, b = C() a.a, a.b, a.c = 1, 2, 3 # shared-key order b.a, b.b, b.c = 1, 2, 3 # same order, no problem del b.a # mp->ma_used = 2 del b.b # mp->ma_used = 1 b.b = 42 # It's OK because mp->ma_used == ix == 1. Keep sharing. assert(list(b.__dict__.keys()) == ["c", "b"]) # AssertionError! Actual order is [(PENDING "a",) "b", "c"]
> add tons of test for shared-key dict So what if we delete mp->ma_used == ix or use mp->ma_keys->dk_nentries == ix? Do we still have any case breaking the order?
Xiang: I will remove the now useless check, but after beta 1. This issue is about SystemError and you now want to reuse the issue to implement an optimization. Please open a new issue for supporting deletion on split table if you consider that it's possible and that it would be a good idea.
The Blaze test suite segfaults with 4a5b61b0d090.
> The Blaze test suite segfaults with 4a5b61b0d090. What is the Blaze test suite? Are you sure that you really recompiled Python from scratch? Try: make distclean && ./configure --with-pydebug && make
Yes, I'm sure. I even cloned a fresh repo. Also, I tested in release mode (not --with-pydebug). https://github.com/blaze/blaze
The immediate question is: is this serious enough to block 3.6.0b1 or can it wait for b2? The b1 bits are just about ready to be published.
I'm not sure. In the Blaze test suite, 1e7b636b6009 has the SystemError. 4a5b61b0d090 segfaults. Blaze is pushing Python's dynamic capabilities to absolute limits, so perhaps this is specific to a few applications only. For Blaze *itself* there is no difference whether this is fixed now or in the next beta. So releasing 3.6.0b1 now and setting this back to blocker afterwards sounds good to me.
OK, thanks, Stefan! OK with you, Victor?
If Victor can't reply now (it's getting late in Europe), I'd just release. Pretend that I set it to deferred blocker. :)
Yes, he's had a *long* day. I'll take your advice, Stefan. Thanks.
I didn't see any segfault on the Python test suite on buildbots. It's either a bug in Blaze (Python C API change like METH_CALL) or a real bug in CPython. It's a beta, we can fix bugs later :-)
It could still be a stack overflow, but on the surface it does not look like one. It's definitely related to the aforementioned revision: ==3442== Invalid read of size 8 ==3442== at 0x49DBD8: _PyDict_Pop (dictobject.c:1743) ==3442== by 0x4A0BE2: dict_pop (dictobject.c:2732) ==3442== by 0x4AA5F8: _PyCFunction_FastCallDict (methodobject.c:229) ==3442== by 0x4AA70B: _PyCFunction_FastCallKeywords (methodobject.c:267) ==3442== by 0x55FE63: call_function (ceval.c:4794) ==3442== by 0x55AA82: _PyEval_EvalFrameDefault (ceval.c:3267) ==3442== by 0x54D9CC: PyEval_EvalFrameEx (ceval.c:718) ==3442== by 0x560123: _PyFunction_FastCall (ceval.c:4876) ==3442== by 0x56023B: fast_function (ceval.c:4906) ==3442== by 0x55FF91: call_function (ceval.c:4815) ==3442== by 0x55AA82: _PyEval_EvalFrameDefault (ceval.c:3267) ==3442== by 0x54D9CC: PyEval_EvalFrameEx (ceval.c:718) ==3442== Address 0x0 is not stack'd, malloc'd or (recently) free'd ==3442== ==3442== ==3442== Process terminating with default action of signal 11 (SIGSEGV) ==3442== Access not within mapped region at address 0x0 ==3442== at 0x49DBD8: _PyDict_Pop (dictobject.c:1743) ==3442== by 0x4A0BE2: dict_pop (dictobject.c:2732) ==3442== by 0x4AA5F8: _PyCFunction_FastCallDict (methodobject.c:229) ==3442== by 0x4AA70B: _PyCFunction_FastCallKeywords (methodobject.c:267) ==3442== by 0x55FE63: call_function (ceval.c:4794) ==3442== by 0x55AA82: _PyEval_EvalFrameDefault (ceval.c:3267) ==3442== by 0x54D9CC: PyEval_EvalFrameEx (ceval.c:718) ==3442== by 0x560123: _PyFunction_FastCall (ceval.c:4876) ==3442== by 0x56023B: fast_function (ceval.c:4906) ==3442== by 0x55FF91: call_function (ceval.c:4815) ==3442== by 0x55AA82: _PyEval_EvalFrameDefault (ceval.c:3267) ==3442== by 0x54D9CC: PyEval_EvalFrameEx (ceval.c:718)
> So what if we delete mp->ma_used == ix or use mp->ma_keys->dk_nentries == ix? Do we still have any case breaking the order? Yes. `mp->ma_used == ix` means no more guard about key ordering. class C: ... a, b = C() a.a, a.b = 1, 2 # shared key order is [a, b] # b has [a, b] pending slot too, and no guard when inserting pending slot. b.b, b.a = 3, 4 # continue to using sharing key assert list(b.__dict__.keys()) == ['b', 'a']) # AssertionError! --- `mp->ma_keys->dk_nentries == ix` is nonsense. It prohibits to inserting pending slot: a, b = C() a.a, a.b = 1, 2 # shared key order is [a, b] # Since ma_keys is shared, b's dk_nentries == 2 b.a = 3 # ix = 0, dk_nentries = 2; stop using sharing keys. --- To keep using sharing key, my idea is adding one more member to dict: mp->ma_values_end. When inserting to pending or empty slot, check that `ix >= mp_ma_values_end` to ensure ordering and `set mp->ma_values_end = ix+1` if it's OK. a, b = C() # Both of ma_values_end = 0 a.a, a.b = 1, 2 # shared key order is [a, b], a's ma_values_end = 2 b.b = 3 # ma_values_end (=0) <= ix (=1); OK; set ma_values_end = 2 b.a = 4 # ma_values_end (=2) > ix (=0); NG; convert to combined table But testing such an implementation detail is hard from pure Python. (No API for checking ma_values_end, the dict is split or combined, and two dict share keys). I don't know adding such hack is worth enough. Dict is made by tons of hacks, you know. I think new OrderedDict implementation based on new dict implementation is more worth. If dict is far more efficient than OrderedDict, people may use dict even when OrderedDict should be used. But I don't know it can be done after beta1.
> assert list(b.__dict__.keys()) == ['b', 'a']) # AssertionError! No, there is no error. Once an pending slot is encountered it is combined. But inserting pending slots is prohibited and it's far from ideal. We need to know if the pending slot is the last entry or not. But it seems we can't achieve it using the current implementation. Sad. :( In short, I give up. Sorry for the noise.
New changeset 579141d6e353 by Victor Stinner in branch 'default': Issue #28040: Cleanup find_empty_slot() https://hg.python.org/cpython/rev/579141d6e353
Stefan Krah: "It could still be a stack overflow, but on the surface it does not look like one. It's definitely related to the aforementioned revision: (...)" The initial bug reported in this issue is now fixed. Even if the Blaze issue is related, I would really prefer to discuss it in a different issue, so I created #28120: "The Blaze test suite segfaults with 4a5b61b0d090". @Xiang: I simplified find_empty_slot() to remove code to support split table. Again, if you want to support deletion in split table, please open a new issue. I now close this issue. Thanks for the report Matthias! Thanks for the quick fix and the unit test Naoki!
I don't understand how the original issue is fixed if 1e7b636b6009 exhibits the SystemError and the very next revision 4a5b61b0d090 (the fix) segfaults. And the test suite works with the previous dict. Sure, it can be a third party issue, but this constellation *does* seem pretty strange. Happy to discuss it in another issue though.
Stefan Krah: "I don't understand how the original issue is fixed if 1e7b636b6009 exhibits the SystemError" "SystemError: returned NULL without setting an error" is a generic error, basically it says that a C function has bug :-) It's hard to know exactly which C function has the issue. "and the very next revision 4a5b61b0d090 (the fix) segfaults" A segfault is not a SystemError: it's a new bug. "Sure, it can be a third party issue, but this constellation *does* seem pretty strange." In case of doubt, I prefer to open a new issue. Please let's continue in the issue #28120 (see my questions there!).
New changeset 3c7456e28777 by Victor Stinner in branch '3.6': Issue #28040: Cleanup find_empty_slot() https://hg.python.org/cpython/rev/3c7456e28777
+ msg276274
+ msg276223
+ msg276222
+ msg276206
+ msg276202
+ msg276200
+ msg276141
+ msg276125
+ msg276120
+ msg276116
messages:
+ msg276115
+ msg276110
+ msg276108
+ msg276107
+ msg276106
+ msg276104
nosy:
+ ned.deily
+ skrah
messages:
+ msg276101
+ msg276090
+ msg276085
+ msg276083
+ msg276081
files:
+ poc.patch
messages:
+ msg276080
resolution: fixed
messages:
+ msg275558
+ python-dev
messages:
+ msg275554
+ msg275535
+ fix-splittable-pop.patch
+ msg275322
— msg275292
— issue28040.patch
+ msg275320
+ msg275317
+ msg275312
+ msg275311
+ fix-compact-dict-deletion.patch
+ fix-compact-dict-deletion.patch
messages:
+ msg275304
+ issue28040.patch
keywords:
+ patch
+ xiang.zhang
messages:
+ msg275292
+ vstinner, methane
+ msg275286