Archive for the ‘dlmalloc’ Category

android: dlmalloc and mmap

December 31, 2015

This post discusses dlmalloc and mmap().

reference code base
bionic: android-6.0.0_r7
kernel: linux 4.3

what is dlmalloc
An implementation of bionic memory allocator. It is replaced by jremalloc. Developers could explicitly choose dlmalloc by adding “MALLOC_IMPL := dlmalloc” in $(android)/device/$(vendor)/$(project)/$(project).mk.

dlmalloc and mmap()
malloc() calls mmap() if needed, and free() calls munmap() if needed.

void *mmap(void *addr, size_t length, int prot , int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);

By default malloc calls mmap(0, (s), (PROT_READ|PROT_WRITE), (MAP_PRIVATE|MAP_ANONYMOUS), -1, 0). addr 0 implies that kernel dynamically chooses feasible address space. prot (PROT_READ|PROT_WRITE) means that the allocated memory could be read and written but couldn’t be executed. flags (MAP_PRIVATE|MAP_ANONYMOUS) means that the mapping is a private copy-on-write mapping and it maps to an anonymous page. The default value of this page is zero. The fd -1 is required for some platform while mmap anonymous pages.

#if HAVE_MMAP
#ifndef WIN32
#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
#define MMAP_PROT            (PROT_READ|PROT_WRITE)
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS        MAP_ANON
#endif /* MAP_ANON */
#ifdef MAP_ANONYMOUS
#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
#define MMAP_DEFAULT(s)       mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
#else /* MAP_ANONYMOUS */

sys_mmap()
In arm64, sys_mmap() leverages sys_mmap_pgoff().

sys_mmap()
-> sys_mmap_pgoff()
   -> vm_mmap_pgoff()
      -> down_write(&mm->mmap_sem)
      -> do_mmap_pgoff()
         -> do_mmap()
            -> get_unmapped_area()
            -> mmap_region()
      -> up_write(&mm->mmap_sem)

If MAP_ANONYMOUS flag is set in flags, then sys_mmap_pgoff() skips reading fd and calls vm_mmap_pgoff(file, addr, len, prot, flags, pgoff) with file as NULL. Some platform require fd as -1 while requesting anonymous mapping. But linux doesn’t have this requirement.

1410 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1411                 unsigned long, prot, unsigned long, flags,
1412                 unsigned long, fd, unsigned long, pgoff)
1413 {
1414         struct file *file = NULL;
1415         unsigned long retval = -EBADF;
1416 
1417         if (!(flags & MAP_ANONYMOUS)) {
1418                 audit_mmap_fd(fd, flags);
1419                 file = fget(fd);
1420                 if (!file)
1421                         goto out;
1422                 if (is_file_hugepages(file))
1423                         len = ALIGN(len, huge_page_size(hstate_file(file)));
1424                 retval = -EINVAL;
1425                 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1426                         goto out_fput;
1427         } else if (flags & MAP_HUGETLB) {
1428                 struct user_struct *user = NULL;
1429                 struct hstate *hs;
1430 
1431                 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
1432                 if (!hs)
1433                         return -EINVAL;
1434 
1435                 len = ALIGN(len, huge_page_size(hs));
1436                 /*
1437                  * VM_NORESERVE is used because the reservations will be
1438                  * taken when vm_ops->mmap() is called
1439                  * A dummy user value is used because we are not locking
1440                  * memory so no accounting is necessary
1441                  */
1442                 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1443                                 VM_NORESERVE,
1444                                 &user, HUGETLB_ANONHUGE_INODE,
1445                                 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1446                 if (IS_ERR(file))
1447                         return PTR_ERR(file);
1448         }
1449 
1450         flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1451 
1452         retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1453 out_fput:
1454         if (file)
1455                 fput(file);
1456 out:
1457         return retval;
1458 }

Since the mmap is MAP_ANONYMOUS, then the corresponding vma of this mapping doesn’t have vm_flags VM_SHARED | VM_MAYSHARE set.

1261  * The caller must hold down_write(&current->mm->mmap_sem).
1262  */
1263 unsigned long do_mmap(struct file *file, unsigned long addr,
1264                         unsigned long len, unsigned long prot,
1265                         unsigned long flags, vm_flags_t vm_flags,
1266                         unsigned long pgoff, unsigned long *populate)
1267 {
1268         struct mm_struct *mm = current->mm;
1269 
1270         *populate = 0;
1271 
1272         if (!len)
1273                 return -EINVAL;
1274 
1275         /*
1276          * Does the application expect PROT_READ to imply PROT_EXEC?
1277          *
1278          * (the exception is when the underlying filesystem is noexec
1279          *  mounted, in which case we dont add PROT_EXEC.)
1280          */
1281         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1282                 if (!(file && path_noexec(&file->f_path)))
1283                         prot |= PROT_EXEC;
1284 
1285         if (!(flags & MAP_FIXED))
1286                 addr = round_hint_to_min(addr);
1287 
1288         /* Careful about overflows.. */
1289         len = PAGE_ALIGN(len);
1290         if (!len)
1291                 return -ENOMEM;
1292 
1293         /* offset overflow? */
1294         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1295                 return -EOVERFLOW;
1296 
1297         /* Too many mappings? */
1298         if (mm->map_count > sysctl_max_map_count)
1299                 return -ENOMEM;
1300 
1301         /* Obtain the address to map to. we verify (or select) it and ensure
1302          * that it represents a valid section of the address space.
1303          */
1304         addr = get_unmapped_area(file, addr, len, pgoff, flags);
1305         if (addr & ~PAGE_MASK)
1306                 return addr;
1307 
1308         /* Do simple checking here so the lower-level routines won't have
1309          * to. we assume access permissions have been handled by the open
1310          * of the memory object, so we don't do any here.
1311          */
1312         vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1313                         mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1314 
1315         if (flags & MAP_LOCKED)
1316                 if (!can_do_mlock())
1317                         return -EPERM;
1318 
1319         if (mlock_future_check(mm, vm_flags, len))
1320                 return -EAGAIN;
1321 
1322         if (file) {
1323                 struct inode *inode = file_inode(file);
1324 
1325                 switch (flags & MAP_TYPE) {
1326                 case MAP_SHARED:
1327                         if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1328                                 return -EACCES;
1329 
1330                         /*
1331                          * Make sure we don't allow writing to an append-only
1332                          * file..
1333                          */
1334                         if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1335                                 return -EACCES;
1336 
1337                         /*
1338                          * Make sure there are no mandatory locks on the file.
1339                          */
1340                         if (locks_verify_locked(file))
1341                                 return -EAGAIN;
1342 
1343                         vm_flags |= VM_SHARED | VM_MAYSHARE;
1344                         if (!(file->f_mode & FMODE_WRITE))
1345                                 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1346 
1347                         /* fall through */
1348                 case MAP_PRIVATE:
1349                         if (!(file->f_mode & FMODE_READ))
1350                                 return -EACCES;
1351                         if (path_noexec(&file->f_path)) {
1352                                 if (vm_flags & VM_EXEC)
1353                                         return -EPERM;
1354                                 vm_flags &= ~VM_MAYEXEC;
1355                         }
1356 
1357                         if (!file->f_op->mmap)
1358                                 return -ENODEV;
1359                         if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1360                                 return -EINVAL;
1361                         break;
1362 
1363                 default:
1364                         return -EINVAL;
1365                 }
1366         } else {
1367                 switch (flags & MAP_TYPE) {
1368                 case MAP_SHARED:
1369                         if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1370                                 return -EINVAL;
1371                         /*
1372                          * Ignore pgoff.
1373                          */
1374                         pgoff = 0;
1375                         vm_flags |= VM_SHARED | VM_MAYSHARE;
1376                         break;
1377                 case MAP_PRIVATE:
1378                         /*
1379                          * Set pgoff according to addr for anon_vma.
1380                          */
1381                         pgoff = addr >> PAGE_SHIFT;
1382                         break;
1383                 default:
1384                         return -EINVAL;
1385                 }
1386         }
1387 
1388         /*
1389          * Set 'VM_NORESERVE' if we should not account for the
1390          * memory use of this mapping.
1391          */
1392         if (flags & MAP_NORESERVE) {
1393                 /* We honor MAP_NORESERVE if allowed to overcommit */
1394                 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1395                         vm_flags |= VM_NORESERVE;
1396 
1397                 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1398                 if (file && is_file_hugepages(file))
1399                         vm_flags |= VM_NORESERVE;
1400         }
1401 
1402         addr = mmap_region(file, addr, len, vm_flags, pgoff);
1403         if (!IS_ERR_VALUE(addr) &&
1404             ((vm_flags & VM_LOCKED) ||
1405              (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1406                 *populate = len;
1407         return addr;
1408 }

conclusion
This post discusses how dlmalloc implementation of malloc() calls mmap().


%d bloggers like this: