The Pedigree Project  0.1
PosixSubsystem.cc
1 /*
2  * Copyright (c) 2008-2014, Pedigree Developers
3  *
4  * Please see the CONTRIB file in the root of the source tree for a full
5  * list of contributors.
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "pedigree/kernel/Log.h"
21 #include <PosixSubsystem.h>
22 
23 #include "pedigree/kernel/process/PerProcessorScheduler.h"
24 #include "pedigree/kernel/process/Scheduler.h"
25 #include "pedigree/kernel/process/SignalEvent.h"
26 #include "pedigree/kernel/process/Thread.h"
27 #include "pedigree/kernel/process/Uninterruptible.h"
28 #include "pedigree/kernel/processor/PhysicalMemoryManager.h"
29 #include "pedigree/kernel/processor/Processor.h"
30 #include "pedigree/kernel/processor/state.h"
31 #include "pedigree/kernel/processor/types.h"
32 #include "pedigree/kernel/syscallError.h"
33 
34 #include "pedigree/kernel/LockGuard.h"
36 #include "pedigree/kernel/utilities/Tree.h"
37 
38 #include "pedigree/kernel/utilities/assert.h"
39 
40 #include "FileDescriptor.h"
41 #include "PosixProcess.h"
42 #include "logging.h"
43 
44 #include "modules/system/linker/DynamicLinker.h"
45 #include "modules/system/vfs/File.h"
46 #include "modules/system/vfs/LockedFile.h"
48 #include "modules/system/vfs/Symlink.h"
49 #include "modules/system/vfs/VFS.h"
50 #include "pedigree/kernel/linker/Elf.h"
51 
52 #include "file-syscalls.h"
53 
54 #include <signal.h>
55 
56 #include <vdso.h> // Header with the vdso.so binary in it.
57 
58 extern char __posix_compat_vsyscall_base;
59 
60 #define POSIX_VSYSCALL_ADDRESS 0xffffffffff600000
61 
62 #define O_RDONLY 0
63 #define O_WRONLY 1
64 #define O_RDWR 2
65 
66 #define FD_CLOEXEC 1
67 
70 
71 ProcessGroupManager ProcessGroupManager::m_Instance;
72 
73 extern void pedigree_init_sigret();
74 extern void pedigree_init_pthreads();
75 
76 ProcessGroupManager::ProcessGroupManager() : m_GroupIds()
77 {
78  m_GroupIds.set(0);
79 }
80 
81 ProcessGroupManager::~ProcessGroupManager()
82 {
83 }
84 
86 {
87  size_t bit = m_GroupIds.getFirstClear();
88  m_GroupIds.set(bit);
89  return bit;
90 }
91 
93 {
94  if (m_GroupIds.test(gid))
95  {
96  PS_NOTICE("ProcessGroupManager: setGroupId called on a group ID that "
97  "existed already!");
98  }
99  m_GroupIds.set(gid);
100 }
101 
103 {
104  return m_GroupIds.test(gid);
105 }
106 
108 {
109  m_GroupIds.clear(gid);
110 }
111 
113  : Subsystem(s), m_SignalHandlers(), m_SignalHandlersLock(), m_FdMap(),
114  m_NextFd(s.m_NextFd), m_FdLock(), m_FdBitmap(), m_LastFd(0),
115  m_FreeCount(s.m_FreeCount), m_AltSigStack(), m_SyncObjects(), m_Threads(),
116  m_ThreadWaiters(), m_NextThreadWaiter(1)
117 {
118  while (!m_SignalHandlersLock.acquire())
119  ;
120  while (!s.m_SignalHandlersLock.enter())
121  ;
122 
123  // Copy all signal handlers
124  for (sigHandlerTree::Iterator it = s.m_SignalHandlers.begin();
125  it != s.m_SignalHandlers.end(); it++)
126  {
127  size_t key = it.key();
128  void *value = it.value();
129  if (!value)
130  continue;
131 
132  SignalHandler *newSig =
133  new SignalHandler(*reinterpret_cast<SignalHandler *>(value));
134  m_SignalHandlers.insert(key, newSig);
135  }
136 
139 
140  // Copy across waiter state.
142  it != s.m_ThreadWaiters.end(); ++it)
143  {
144  void *key = it.key();
145 
146  Semaphore *sem = new Semaphore(0);
147  m_ThreadWaiters.insert(key, sem);
148  }
149 
150  m_NextThreadWaiter = s.m_NextThreadWaiter;
151 }
152 
154 {
155  assert(--m_FreeCount == 0);
156 
157  acquire();
158 
159  // Destroy all signal handlers
160  for (sigHandlerTree::Iterator it = m_SignalHandlers.begin();
161  it != m_SignalHandlers.end(); it++)
162  {
163  // Get the signal handler and remove it. Note that there shouldn't be
164  // null SignalHandlers, at all.
165  SignalHandler *sig = it.value();
166  assert(sig);
167 
168  // SignalHandler's destructor will delete the Event itself
169  delete sig;
170  }
171 
172  // And now that the signals are destroyed, remove them from the Tree
173  m_SignalHandlers.clear();
174 
175  release();
176 
177  // For sanity's sake, destroy any remaining descriptors
178  freeMultipleFds();
179 
180  // Remove any POSIX threads that might still be lying around
182  it != m_Threads.end(); it++)
183  {
184  PosixThread *thread = it.value();
185  assert(thread); // There shouldn't have ever been a null PosixThread in
186  // there
187 
188  // If the thread is still running, it should be killed
189  if (!thread->isRunning.tryAcquire())
190  {
191  WARNING(
192  "PosixSubsystem object freed when a thread is still running?");
193  // Thread will just stay running, won't be deallocated or killed
194  }
195 
196  // Clean up any thread-specific data
198  thread->m_ThreadData.begin();
199  it2 != thread->m_ThreadData.end(); it2++)
200  {
201  PosixThreadKey *p = reinterpret_cast<PosixThreadKey *>(it.value());
202  assert(p);
203 
206  delete p;
207  }
208 
209  thread->m_ThreadData.clear();
210  delete thread;
211  }
212 
213  m_Threads.clear();
214 
215  // Clean up synchronisation objects
217  it != m_SyncObjects.end(); it++)
218  {
219  PosixSyncObject *p = it.value();
220  assert(p);
221 
222  if (p->pObject)
223  {
224  if (p->isMutex)
225  delete reinterpret_cast<Mutex *>(p->pObject);
226  else
227  delete reinterpret_cast<Semaphore *>(p->pObject);
228  }
229  }
230 
231  m_SyncObjects.clear();
232 
234  it != m_ThreadWaiters.end(); ++it)
235  {
236  // Wake up everything waiting and then destroy the waiter object.
237  Semaphore *sem = it.value();
238  sem->release(-sem->getValue());
239  delete sem;
240  }
241 
243 
244  // Take the memory map lock before we become uninterruptible.
245  while (!MemoryMapManager::instance().acquireLock())
246  ;
247 
248  // Spinlock as a quick way of disabling interrupts.
249  Spinlock spinlock;
250  spinlock.acquire();
251 
252  // Switch to the address space of the process we're destroying.
253  // We need to unmap memory maps, and we can't do that in our address space.
254  VirtualAddressSpace &curr =
255  Processor::information().getVirtualAddressSpace();
256  VirtualAddressSpace *va = m_pProcess->getAddressSpace();
257 
258  if (va != &curr)
259  {
260  // Switch into the address space we want to unmap inside.
262  }
263 
264  // Remove all existing mappings, if any.
266 
267  if (va != &curr)
268  {
270  }
271 
272  spinlock.release();
273 
274  // Give back the memory map lock now - we're interruptible again.
276 }
277 
279 {
280  Thread *me = Processor::information().getCurrentThread();
281 
282  m_Lock.acquire();
283  if (m_bAcquired && m_pAcquiredThread == me)
284  {
285  m_Lock.release();
286  return; // already acquired
287  }
288  m_Lock.release();
289 
290  // Ensure that no descriptor operations are taking place (and then, will
291  // take place)
292  while (!m_FdLock.acquire())
293  ;
294 
295  // Modifying signal handlers, ensure that they are not in use
296  while (!m_SignalHandlersLock.acquire())
297  ;
298 
299  // Safe to do without spinlock as we hold the other locks now.
300  m_pAcquiredThread = me;
301  m_bAcquired = true;
302 }
303 
305 {
306  // Opposite order to acquire()
307  m_Lock.acquire();
308  m_bAcquired = false;
309  m_pAcquiredThread = nullptr;
310 
312  m_FdLock.release();
313 
314  m_Lock.release();
315 }
316 
317 bool PosixSubsystem::checkAddress(uintptr_t addr, size_t extent, size_t flags)
318 {
319 #ifdef POSIX_NO_EFAULT
320  return true;
321 #endif
322 
323  Uninterruptible while_checking;
324 
325 #ifdef VERBOSE_KERNEL
326  PS_NOTICE(
327  "PosixSubsystem::checkAddress(" << Hex << addr << ", " << Dec << extent
328  << ", " << Hex << flags << ")");
329 #endif
330 
331  // No memory access expected, all good.
332  if (!extent)
333  {
334 #ifdef VERBOSE_KERNEL
335  PS_NOTICE(" -> zero extent, address is sane.");
336 #endif
337  return true;
338  }
339 
340  uintptr_t aa = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
341 #ifdef VERBOSE_KERNEL
342  PS_NOTICE(" -> ret: " << aa);
343 #endif
344 
345  // Check address range.
346  VirtualAddressSpace &va = Processor::information().getVirtualAddressSpace();
347  if ((addr < va.getUserStart()) || (addr >= va.getKernelStart()))
348  {
349 #ifdef VERBOSE_KERNEL
350  PS_NOTICE(" -> outside of user address area.");
351 #endif
352  return false;
353  }
354 
355  // Short-circuit if this is a memory mapped region.
356  if (MemoryMapManager::instance().contains(addr, extent))
357  {
358 #ifdef VERBOSE_KERNEL
359  PS_NOTICE(" -> inside memory map.");
360 #endif
361  return true;
362  }
363 
364  // Check the range.
365  for (size_t i = 0; i < extent; i += PhysicalMemoryManager::getPageSize())
366  {
367  void *pAddr = reinterpret_cast<void *>(addr + i);
368  if (!va.isMapped(pAddr))
369  {
370 #ifdef VERBOSE_KERNEL
371  PS_NOTICE(" -> page " << Hex << pAddr << " is not mapped.");
372 #endif
373  return false;
374  }
375 
376  if (flags & SafeWrite)
377  {
378  size_t vFlags = 0;
379  physical_uintptr_t phys = 0;
380  va.getMapping(pAddr, phys, vFlags);
381 
382  if (!(vFlags & (VirtualAddressSpace::Write |
384  {
385 #ifdef VERBOSE_KERNEL
386  PS_NOTICE(" -> not writeable.");
387 #endif
388  return false;
389  }
390  }
391  }
392 
393 #ifdef VERBOSE_KERNEL
394  PS_NOTICE(" -> mapped and available.");
395 #endif
396  return true;
397 }
398 
399 void PosixSubsystem::exit(int code)
400 {
401  Thread *pThread = Processor::information().getCurrentThread();
402 
403  Process *pProcess = pThread->getParent();
404  NOTICE(
405  "PosixSubsystem::exit(" << Dec << pProcess->getId() << ", code=" << code
406  << ")");
407  pProcess->markTerminating();
408 
409  if (pProcess->getExitStatus() == 0 || // Normal exit.
410  pProcess->getExitStatus() == 0x7F || // Suspended.
411  pProcess->getExitStatus() == 0xFF) // Continued.
412  pProcess->setExitStatus((code & 0xFF) << 8);
413  if (code)
414  {
415  pThread->unexpectedExit();
416  }
417 
418  // Exit called, but we could be at any nesting level in the event stack.
419  // We have to propagate this exit() to all lower stack levels because they
420  // may have semaphores and stuff open.
421 
422  // So, if we're not dealing with the lowest in the stack...
428  if (pThread->getStateLevel() > 1)
429  {
430  // OK, we have other events running. They'll have to die first before we
431  // can do anything.
432  pThread->setUnwindState(Thread::Exit);
433 
434  Thread *pBlockingThread =
435  pThread->getBlockingThread(pThread->getStateLevel() - 1);
436  while (pBlockingThread)
437  {
439  pBlockingThread = pBlockingThread->getBlockingThread();
440  }
441 
442  Processor::information().getScheduler().eventHandlerReturned();
443  }
445 
446  // We're the lowest in the stack, so we can proceed with the exit function.
447 
448  delete pProcess->getLinker();
449 
451 
452  // If it's a POSIX process, remove group membership
453  if (pProcess->getType() == Process::Posix)
454  {
455  PosixProcess *p = static_cast<PosixProcess *>(pProcess);
456  ProcessGroup *pGroup = p->getProcessGroup();
457  if (pGroup)
458  {
459  if (p->getGroupMembership() == PosixProcess::Member)
460  {
462  pGroup->Members.begin();
463  it != pGroup->Members.end(); it++)
464  {
465  if ((*it) == p)
466  {
467  it = pGroup->Members.erase(it);
468  break;
469  }
470  }
471  }
472  else if (p->getGroupMembership() == PosixProcess::Leader)
473  {
474  // Group loses a leader, this is fine
475  pGroup->Leader = nullptr;
476  }
477 
478  if (!pGroup->Members.size())
479  {
480  // Destroy the group, we were the last process in it.
481  delete pGroup;
482  pGroup = 0;
483  }
484  }
485  }
486 
487  // Notify parent that we terminated (we may be in a separate process group).
488  Process *pParent = pProcess->getParent();
489  if (pParent && pParent->getSubsystem())
490  {
491  pParent->getSubsystem()->threadException(pParent->getThread(0), Child);
492  }
493 
494  // Clean up the descriptor table
495  freeMultipleFds();
496 
497  // Tell some interesting info
498  NOTICE("at exit for pid " << Dec << pProcess->getId() << "...");
499  NOTICE(
500  " -> file lookup LRU cache had "
501  << m_FindFileCache.hits() << " hits and " << m_FindFileCache.misses()
502  << " misses");
503 
504  pProcess->kill();
505 
506  // Should NEVER get here.
507  FATAL("PosixSubsystem::exit() running after Process::kill()!");
508 }
509 
510 bool PosixSubsystem::kill(KillReason killReason, Thread *pThread)
511 {
512  if (!pThread)
513  pThread = Processor::information().getCurrentThread();
514  Process *pProcess = pThread->getParent();
515  if (pProcess->getType() != Process::Posix)
516  {
517  ERROR("PosixSubsystem::kill called with a non-POSIX process!");
518  return false;
519  }
520  PosixSubsystem *pSubsystem =
521  static_cast<PosixSubsystem *>(pProcess->getSubsystem());
522 
523  // Send SIGKILL. getSignalHandler handles all that locking shiz for us.
524  SignalHandler *sig = 0;
525  switch (killReason)
526  {
527  case Interrupted:
528  sig = pSubsystem->getSignalHandler(2);
529  break;
530 
531  case Terminated:
532  sig = pSubsystem->getSignalHandler(15);
533  break;
534 
535  default:
536  sig = pSubsystem->getSignalHandler(9);
537  break;
538  }
539 
540  if (sig && sig->pEvent)
541  {
542  PS_NOTICE("PosixSubsystem - killing " << pThread->getParent()->getId());
543 
544  // Send the kill event
546  pThread->sendEvent(sig->pEvent);
547 
548  // Allow the event to run
551  }
552 
553  return true;
554 }
555 
557 {
558  PS_NOTICE(
559  "PosixSubsystem::threadException -> "
560  << Dec << pThread->getParent()->getId() << ":" << pThread->getId());
561 
562  // What was the exception?
563  int signal = -1;
564  switch (eType)
565  {
566  case PageFault:
567  PS_NOTICE(" (Page fault)");
568  // Send SIGSEGV
569  signal = SIGSEGV;
570  break;
571  case InvalidOpcode:
572  PS_NOTICE(" (Invalid opcode)");
573  // Send SIGILL
574  signal = SIGILL;
575  break;
576  case GeneralProtectionFault:
577  PS_NOTICE(" (General Fault)");
578  // Send SIGBUS
579  signal = SIGBUS;
580  break;
581  case DivideByZero:
582  PS_NOTICE(" (Division by zero)");
583  // Send SIGFPE
584  signal = SIGFPE;
585  break;
586  case FpuError:
587  PS_NOTICE(" (FPU error)");
588  // Send SIGFPE
589  signal = SIGFPE;
590  break;
591  case SpecialFpuError:
592  PS_NOTICE(" (FPU error - special)");
593  // Send SIGFPE
594  signal = SIGFPE;
595  break;
596  case TerminalInput:
597  PS_NOTICE(" (Attempt to read from terminal by non-foreground "
598  "process)");
599  // Send SIGTTIN
600  signal = SIGTTIN;
601  break;
602  case TerminalOutput:
603  PS_NOTICE(" (Output to terminal by non-foreground process)");
604  // Send SIGTTOU
605  signal = SIGTTOU;
606  break;
607  case Continue:
608  PS_NOTICE(" (Continuing a stopped process)");
609  // Send SIGCONT
610  signal = SIGCONT;
611  break;
612  case Stop:
613  PS_NOTICE(" (Stopping a process)");
614  // Send SIGTSTP
615  signal = SIGTSTP;
616  break;
617  case Interrupt:
618  PS_NOTICE(" (Interrupting a process)");
619  // Send SIGINT
620  signal = SIGINT;
621  break;
622  case Quit:
623  PS_NOTICE(" (Requesting quit)");
624  // Send SIGTERM
625  signal = SIGTERM;
626  break;
627  case Child:
628  PS_NOTICE(" (Child status changed)");
629  // Send SIGCHLD
630  signal = SIGCHLD;
631  break;
632  case Pipe:
633  PS_NOTICE(" (Pipe broken)");
634  // Send SIGPIPE
635  signal = SIGPIPE;
636  break;
637  default:
638  PS_NOTICE(" (Unknown)");
639  // Unknown exception
640  ERROR(
641  "Unknown exception type in threadException - POSIX subsystem");
642  break;
643  }
644 
645  sendSignal(pThread, signal);
646 }
647 
648 void PosixSubsystem::sendSignal(Thread *pThread, int signal, bool yield)
649 {
650  PS_NOTICE(
651  "PosixSubsystem::sendSignal #" << signal << " -> pid:tid " << Dec
652  << pThread->getParent()->getId() << ":"
653  << pThread->getId());
654 
655  Process *pProcess = pThread->getParent();
656  if (pProcess->getType() != Process::Posix)
657  {
658  ERROR(
659  "PosixSubsystem::threadException called with a non-POSIX process!");
660  return;
661  }
662  PosixSubsystem *pSubsystem =
663  static_cast<PosixSubsystem *>(pProcess->getSubsystem());
664 
665  // What was the exception?
666  SignalHandler *sig = pSubsystem->getSignalHandler(signal);
667  if (!sig)
668  {
669  ERROR("Unknown signal in sendSignal - POSIX subsystem");
670  }
671 
672  // If we're good to go, send the signal.
673  if (sig && sig->pEvent)
674  {
675  // Is this process already pending a delivery of the given signal?
676  if (pThread->hasEvent(sig->pEvent))
677  {
678  // yep! we need to drop this generated signal instead of sending it
679  // again to the target thread
680  WARNING("PosixSubsystem::sendSignal dropping signal as a previous "
681  "generation has not delivered yet.");
682  }
683  else
684  {
685  pThread->sendEvent(sig->pEvent);
686 
687  if (yield)
688  {
689  Thread *pCurrentThread =
690  Processor::information().getCurrentThread();
691  if (pCurrentThread == pThread)
692  {
693  // Attempt to execute the new event immediately.
694  Processor::information().getScheduler().checkEventState(0);
695  }
696  else
697  {
698  // Yield so the event can fire.
700  }
701  }
702  }
703  }
704  else
705  {
706  // PS_NOTICE("No event configured for signal #" << signal << ", silently
707  // dropping!");
708  NOTICE(
709  "No event configured for signal #" << signal
710  << ", silently dropping!");
711  }
712 }
713 
715 {
716  while (!m_SignalHandlersLock.acquire())
717  ;
718 
719  SignalHandler *removal = nullptr;
720 
721  sig %= 32;
722  if (handler)
723  {
724  removal = m_SignalHandlers.lookup(sig);
725  if (removal)
726  {
727  // Remove from the list
728  m_SignalHandlers.remove(sig);
729  }
730 
731  // Insert into the signal handler table
732  handler->sig = sig;
733 
734  m_SignalHandlers.insert(sig, handler);
735  }
736 
738 
739  // Complete the destruction of the handler (waiting for deletion) with no
740  // lock held.
741  if (removal)
742  {
743  delete removal;
744  }
745 }
746 
754 {
755  Uninterruptible throughout;
756 
757  // Enter critical section for writing.
758  while (!m_FdLock.acquire())
759  ;
760 
761  // Try to recycle if possible
762  for (size_t i = m_LastFd; i < m_NextFd; i++)
763  {
764  if (!(m_FdBitmap.test(i)))
765  {
766  m_LastFd = i;
767  m_FdBitmap.set(i);
768  m_FdLock.release();
769  return i;
770  }
771  }
772 
773  // Otherwise, allocate
774  // m_NextFd will always contain the highest allocated fd
775  m_FdBitmap.set(m_NextFd);
776  size_t ret = m_NextFd++;
777  m_FdLock.release();
778  return ret;
779 }
780 
781 void PosixSubsystem::allocateFd(size_t fdNum)
782 {
783  Uninterruptible throughout;
784 
785  // Enter critical section for writing.
786  while (!m_FdLock.acquire())
787  ;
788 
789  if (fdNum >= m_NextFd)
790  m_NextFd = fdNum + 1;
791  m_FdBitmap.set(fdNum);
792 
793  m_FdLock.release();
794 }
795 
796 void PosixSubsystem::freeFd(size_t fdNum)
797 {
798  Uninterruptible throughout;
799 
800  // Enter critical section for writing.
801  while (!m_FdLock.acquire())
802  ;
803 
804  m_FdBitmap.clear(fdNum);
805 
806  FileDescriptor *pFd = m_FdMap.lookup(fdNum);
807  if (pFd)
808  {
809  m_FdMap.remove(fdNum);
810  delete pFd;
811  }
812 
813  if (fdNum < m_LastFd)
814  m_LastFd = fdNum;
815 
816  m_FdLock.release();
817 }
818 
820 {
821  Uninterruptible throughout;
822 
823  assert(pSubsystem);
824 
825  // We're totally resetting our local state, ensure there's no files hanging
826  // around.
827  freeMultipleFds();
828 
829  // Totally changing everything... Don't allow other functions to meddle.
830  while (!m_FdLock.acquire())
831  ;
832  while (!pSubsystem->m_FdLock.acquire())
833  ;
834 
835  // Copy each descriptor across from the original subsystem
836  FdMap &map = pSubsystem->m_FdMap;
837  for (FdMap::Iterator it = map.begin(); it != map.end(); it++)
838  {
839  FileDescriptor *pFd = it.value();
840  if (!pFd)
841  continue;
842  size_t newFd = it.key();
843 
844  FileDescriptor *pNewFd = new FileDescriptor(*pFd);
845 
846  // Perform the same action as addFileDescriptor. We need to duplicate
847  // here because we currently hold the FD lock, which will deadlock if we
848  // call any function which attempts to acquire it.
849  if (newFd >= m_NextFd)
850  m_NextFd = newFd + 1;
851  m_FdBitmap.set(newFd);
852  m_FdMap.insert(newFd, pNewFd);
853  }
854 
855  pSubsystem->m_FdLock.release();
856  m_FdLock.release();
857  return true;
858 }
859 
861  bool bOnlyCloExec, size_t iFirst, size_t iLast)
862 {
863  Uninterruptible throughout;
864 
865  assert(iFirst < iLast);
866 
867  while (!m_FdLock.acquire())
868  ; // Don't allow any access to the FD data
869 
870  // Because removing FDs as we go from the Tree can actually leave the Tree
871  // iterators in a dud state, we'll add all the FDs to remove to this list.
872  List<void *> fdsToRemove;
873 
874  // Are all FDs to be freed? Or only a selection?
875  bool bAllToBeFreed = ((iFirst == 0 && iLast == ~0UL) && !bOnlyCloExec);
876  if (bAllToBeFreed)
877  m_LastFd = 0;
878 
879  FdMap &map = m_FdMap;
880  for (FdMap::Iterator it = map.begin(); it != map.end(); it++)
881  {
882  size_t Fd = it.key();
883  FileDescriptor *pFd = it.value();
884  if (!pFd)
885  continue;
886 
887  if (!(Fd >= iFirst && Fd <= iLast))
888  continue;
889 
890  if (bOnlyCloExec)
891  {
892  if (!(pFd->fdflags & FD_CLOEXEC))
893  continue;
894  }
895 
896  // Perform the same action as freeFd. We need to duplicate code here
897  // because we currently hold the FD lock, which will deadlock if we call
898  // any function which attempts to acquire it.
899 
900  // No longer usable
901  m_FdBitmap.clear(Fd);
902 
903  // Add to the list of FDs to remove, iff we won't be cleaning up the
904  // entire set
905  if (!bAllToBeFreed)
906  fdsToRemove.pushBack(reinterpret_cast<void *>(Fd));
907 
908  // Delete the descriptor itself
909  delete pFd;
910 
911  // And reset the "last freed" tracking variable, if this is lower than
912  // it already.
913  if (Fd < m_LastFd)
914  m_LastFd = Fd;
915  }
916 
917  // Clearing all AND not caring about CLOEXEC FDs? If so, clear the map.
918  // Otherwise, only clear the FDs that are supposed to be cleared.
919  if (bAllToBeFreed)
920  m_FdMap.clear();
921  else
922  {
923  for (List<void *>::Iterator it = fdsToRemove.begin();
924  it != fdsToRemove.end(); it++)
925  m_FdMap.remove(reinterpret_cast<size_t>(*it));
926  }
927 
928  m_FdLock.release();
929 }
930 
932 {
933  Uninterruptible throughout;
934 
935  // Enter the critical section, for reading.
936  while (!m_FdLock.enter())
937  ;
938 
939  FileDescriptor *pFd = m_FdMap.lookup(fd);
940 
941  m_FdLock.leave();
942 
943  return pFd;
944 }
945 
947 {
949  freeFd(fd);
950  allocateFd(fd);
951 
952  {
953  Uninterruptible throughout;
954 
955  // Enter critical section for writing.
956  while (!m_FdLock.acquire())
957  ;
958 
959  m_FdMap.insert(fd, pFd);
960 
961  m_FdLock.release();
962  }
963 }
964 
966 {
968  it != m_Threads.end(); it++)
969  {
970  PosixThread *thread = it.value();
971  if (thread->pThread != pThread)
972  continue;
973 
974  // Can safely assert that this thread is no longer running.
975  // We do not however kill the thread object yet. It can be cleaned up
976  // when the PosixSubsystem quits (if this was the last thread). Or, it
977  // will be cleaned up by a join().
978  thread->isRunning.release();
979  break;
980  }
981 }
982 
983 bool PosixSubsystem::checkAccess(
984  FileDescriptor *pFileDescriptor, bool bRead, bool bWrite,
985  bool bExecute) const
986 {
987  return VFS::checkAccess(pFileDescriptor->file, bRead, bWrite, bExecute);
988 }
989 
991  File *pFile, uintptr_t mappedAddress, uintptr_t &newAddress,
992  uintptr_t &finalAddress, bool &relocated)
993 {
994  PS_NOTICE("PosixSubsystem::loadElf(" << pFile->getName() << ")");
995 
996  Process *pProcess =
997  Processor::information().getCurrentThread()->getParent();
998 
999  // Grab the file header to check magic and find program headers.
1000  Elf::ElfHeader_t *pHeader =
1001  reinterpret_cast<Elf::ElfHeader_t *>(mappedAddress);
1002  if ((pHeader->ident[1] != 'E') || (pHeader->ident[2] != 'L') ||
1003  (pHeader->ident[3] != 'F') || (pHeader->ident[0] != 127))
1004  {
1005  return false;
1006  }
1007 
1008  size_t phnum = pHeader->phnum;
1009  Elf::ElfProgramHeader_t *phdrs =
1010  reinterpret_cast<Elf::ElfProgramHeader_t *>(
1011  mappedAddress + pHeader->phoff);
1012 
1013  // Find full memory size that we need to map in.
1014  uintptr_t startAddress = ~0U;
1015  uintptr_t unalignedStartAddress = 0;
1016  uintptr_t endAddress = 0;
1017  for (size_t i = 0; i < phnum; ++i)
1018  {
1019  if (phdrs[i].type != PT_LOAD)
1020  {
1021  continue;
1022  }
1023 
1024  if (phdrs[i].vaddr < startAddress)
1025  {
1026  startAddress = phdrs[i].vaddr;
1027  }
1028 
1029  uintptr_t maybeEndAddress = phdrs[i].vaddr + phdrs[i].memsz;
1030  if (maybeEndAddress > endAddress)
1031  {
1032  endAddress = maybeEndAddress;
1033  }
1034  }
1035 
1036  // Align to page boundaries.
1037  size_t pageSz = PhysicalMemoryManager::getPageSize();
1038  unalignedStartAddress = startAddress;
1039  startAddress &= ~(pageSz - 1);
1040  if (endAddress & (pageSz - 1))
1041  {
1042  endAddress = (endAddress + pageSz) & ~(pageSz - 1);
1043  }
1044 
1045  // OK, we can allocate space for the file now.
1046  bool bRelocated = false;
1047  if (pHeader->type == ET_REL || pHeader->type == ET_DYN)
1048  {
1049  if (!pProcess->getDynamicSpaceAllocator().allocate(
1050  endAddress - startAddress, newAddress))
1051  if (!pProcess->getSpaceAllocator().allocate(
1052  endAddress - startAddress, newAddress))
1053  return false;
1054 
1055  bRelocated = true;
1056  unalignedStartAddress = newAddress + (startAddress & (pageSz - 1));
1057  startAddress = newAddress;
1058 
1059  newAddress = unalignedStartAddress;
1060 
1061  relocated = true;
1062  }
1063  else
1064  {
1065  if (!pProcess->getDynamicSpaceAllocator().allocateSpecific(
1066  startAddress, endAddress - startAddress))
1067  if (!pProcess->getSpaceAllocator().allocateSpecific(
1068  startAddress, endAddress - startAddress))
1069  return false;
1070 
1071  newAddress = unalignedStartAddress;
1072  }
1073 
1074  finalAddress = startAddress + (endAddress - startAddress);
1075 
1076  // Can now do another pass, mapping in as needed.
1077  for (size_t i = 0; i < phnum; ++i)
1078  {
1079  if (phdrs[i].type != PT_LOAD)
1080  {
1081  continue;
1082  }
1083 
1084  uintptr_t base = phdrs[i].vaddr;
1085  if (bRelocated)
1086  {
1087  base += startAddress;
1088  }
1089  uintptr_t unalignedBase = base;
1090  if (base & (pageSz - 1))
1091  {
1092  base &= ~(pageSz - 1);
1093  }
1094 
1095  uintptr_t offset = phdrs[i].offset;
1096  if (offset & (pageSz - 1))
1097  {
1098  offset &= ~(pageSz - 1);
1099  }
1100 
1101  // if we don't add the unaligned part to the length, we can map only
1102  // enough to cover the aligned page even though the alignment may lead
1103  // to the region covering two pages...
1104  size_t length = phdrs[i].memsz + (unalignedBase & (pageSz - 1));
1105  if (length & (pageSz - 1))
1106  {
1107  length = (length + pageSz) & ~(pageSz - 1);
1108  }
1109 
1110  // Map.
1111  MemoryMappedObject::Permissions perms = MemoryMappedObject::Read;
1112  if (phdrs[i].flags & PF_X)
1113  {
1114  perms |= MemoryMappedObject::Exec;
1115  }
1116  if (phdrs[i].flags & PF_R)
1117  {
1118  perms |= MemoryMappedObject::Read;
1119  }
1120  if (phdrs[i].flags & PF_W)
1121  {
1122  perms |= MemoryMappedObject::Write;
1123  }
1124 
1125  PS_NOTICE(
1126  pFile->getName() << " PHDR[" << i << "]: @" << Hex << base << " -> "
1127  << base + length);
1129  pFile, base, length, perms, offset);
1130  if (!pObject)
1131  {
1132  ERROR("PosixSubsystem::loadElf: failed to map PT_LOAD section");
1133  return false;
1134  }
1135 
1136  if (phdrs[i].memsz > phdrs[i].filesz)
1137  {
1138  uintptr_t end = unalignedBase + phdrs[i].memsz;
1139  uintptr_t zeroStart = unalignedBase + phdrs[i].filesz;
1140  if (zeroStart & (pageSz - 1))
1141  {
1142  size_t numBytes = pageSz - (zeroStart & (pageSz - 1));
1143  if ((zeroStart + numBytes) > end)
1144  {
1145  numBytes = end - zeroStart;
1146  }
1147  ByteSet(reinterpret_cast<void *>(zeroStart), 0, numBytes);
1148  zeroStart += numBytes;
1149  }
1150 
1151  if (zeroStart < end)
1152  {
1153  MemoryMappedObject *pAnonymousRegion =
1155  zeroStart, end - zeroStart, perms);
1156  if (!pAnonymousRegion)
1157  {
1158  ERROR("PosixSubsystem::loadElf: failed to map anonymous "
1159  "pages for filesz/memsz mismatch");
1160  return false;
1161  }
1162  }
1163  }
1164  }
1165 
1166  return true;
1167 }
1168 
1169 File *PosixSubsystem::findFile(const String &path, File *workingDir)
1170 {
1171  if (workingDir == nullptr)
1172  {
1173  assert(m_pProcess);
1174  workingDir = m_pProcess->getCwd();
1175  }
1176 
1177  bool mountAwareAbi = getAbi() != PosixSubsystem::LinuxAbi;
1178 
1179  // for non-mount-aware ABIs, we need to fall back if the path is absolute
1180  // this means we can be on dev»/ and still run things like /bin/ls because
1181  // the lookup for dev»/bin/ls fails and falls back to root»/bin/ls
1182  if (mountAwareAbi || (path[0] != '/'))
1183  {
1184  // no fall back for mount-aware ABIs (e.g. Pedigree's ABI)
1185  // or it's a non-absolute path on a non-mount-aware ABI, and therefore
1186  // needs to be based on the working directory - not a different FS
1187  return VFS::instance().find(path, workingDir);
1188  }
1189 
1190  File *target = nullptr;
1191  if (!m_FindFileCache.get(path, target))
1192  {
1193  // fall back to root filesystem
1194  if (!m_pRootFs)
1195  {
1197  }
1198 
1199  if (m_pRootFs)
1200  {
1201  target = VFS::instance().find(path, m_pRootFs->getRoot());
1202  }
1203  }
1204 
1205  if (target)
1206  {
1207  m_FindFileCache.store(path, target);
1208  }
1209 
1210  return target;
1211 }
1212 
1213 #define STACK_PUSH(stack, value) *--stack = value
1214 #define STACK_PUSH2(stack, value1, value2) \
1215  STACK_PUSH(stack, value1); \
1216  STACK_PUSH(stack, value2)
1217 #define STACK_PUSH_COPY(stack, value, length) \
1218  stack = adjust_pointer(stack, -(length)); \
1219  MemoryCopy(stack, value, length)
1220 #define STACK_PUSH_STRING(stack, str, length) \
1221  stack = adjust_pointer(stack, -(length)); \
1222  StringCopyN(reinterpret_cast<char *>(stack), str, length)
1223 #define STACK_PUSH_ZEROES(stack, length) \
1224  stack = adjust_pointer(stack, -(length)); \
1225  ByteSet(stack, 0, length)
1226 #define STACK_ALIGN(stack, to) \
1227  STACK_PUSH_ZEROES( \
1228  stack, \
1229  (to) - ((to) - (reinterpret_cast<uintptr_t>(stack) & ((to) -1))))
1230 
1232  const char *name, Vector<String> &argv, Vector<String> &env)
1233 {
1234  return invoke(name, argv, env, 0);
1235 }
1236 
1238  const char *name, Vector<String> &argv, Vector<String> &env,
1239  SyscallState &state)
1240 {
1241  return invoke(name, argv, env, &state);
1242 }
1243 
1245  File *pFile, File *&pOutFile, Vector<String> &argv)
1246 {
1247  PS_NOTICE("Attempting to parse shebang in " << pFile->getFullPath());
1248 
1249  // Try and read the shebang, if any.
1251  String fileContents;
1252  bool bSearchDone = false;
1253  size_t offset = 0;
1254  while (!bSearchDone)
1255  {
1256  char buff[129];
1257  size_t nRead =
1258  pFile->read(offset, 128, reinterpret_cast<uintptr_t>(buff));
1259  buff[nRead] = 0;
1260  offset += nRead;
1261 
1262  if (nRead)
1263  {
1264  // Truncate at the newline if one is found (and then stop
1265  // iterating).
1266  char *newline = const_cast<char *>(StringFind(buff, '\n'));
1267  if (newline)
1268  {
1269  bSearchDone = true;
1270  *newline = 0;
1271  }
1272  fileContents += String(buff);
1273  }
1274 
1275  if (nRead < 128)
1276  {
1277  bSearchDone = true;
1278  break;
1279  }
1280  }
1281 
1282  // Is this even a shebang line?
1283  if (!fileContents.startswith("#!"))
1284  {
1285  PS_NOTICE("no shebang found");
1286  return true;
1287  }
1288 
1289  // Strip the shebang.
1290  fileContents.lchomp();
1291  fileContents.lchomp();
1292 
1293  // OK, we have a shebang line. We need to tokenize.
1294  Vector<String> additionalArgv = fileContents.tokenise(' ');
1295  if (!additionalArgv.count())
1296  {
1297  // Not a true shebang line.
1298  PS_NOTICE("split didn't find anything");
1299  return true;
1300  }
1301 
1302  // Normalise path to ensure we have the correct path to invoke.
1303  String invokePath;
1304  String newTarget = *additionalArgv.begin();
1305  if (normalisePath(invokePath, static_cast<const char *>(newTarget)))
1306  {
1307  // rewrote, update argv[0] accordingly.
1308  newTarget = invokePath;
1309  }
1310 
1311  // Can we load the new program?
1312  File *pNewTarget = findFileWithAbiFallbacks(newTarget);
1313  if (!pNewTarget)
1314  {
1315  // No, we cannot.
1316  PS_NOTICE("target not found");
1317  SYSCALL_ERROR(DoesNotExist);
1318  return false;
1319  }
1320 
1321  // OK, we can now insert to argv - we do so backwards so it's just a simple
1322  // pushFront.
1323  while (additionalArgv.count())
1324  {
1325  argv.pushFront(additionalArgv.popBack());
1326  }
1327 
1328  pOutFile = pNewTarget;
1329 
1330  return true;
1331 }
1332 
1333 static File *traverseForInvoke(File *pFile)
1334 {
1335  // Do symlink traversal.
1336  while (pFile && pFile->isSymlink())
1337  {
1338  pFile = Symlink::fromFile(pFile)->followLink();
1339  }
1340  if (!pFile)
1341  {
1342  PS_NOTICE("PosixSubsystem::invoke: symlink traversal failed");
1343  SYSCALL_ERROR(DoesNotExist);
1344  return 0;
1345  }
1346 
1347  // Check for directory.
1348  if (pFile->isDirectory())
1349  {
1350  PS_NOTICE("PosixSubsystem::invoke: target is a directory");
1351  SYSCALL_ERROR(IsADirectory);
1352  return 0;
1353  }
1354 
1355  return pFile;
1356 }
1357 
1359  const char *name, Vector<String> &argv, Vector<String> &env,
1360  SyscallState *state)
1361 {
1362  // Save the original name before we trash the old stack.
1363  String originalName(name);
1364 
1365  // Try and find the target file we want to invoke.
1366  File *originalFile = findFileWithAbiFallbacks(originalName);
1367  if (!originalFile)
1368  {
1369  PS_NOTICE(
1370  "PosixSubsystem::invoke: could not find file '" << originalName << "'");
1371  SYSCALL_ERROR(DoesNotExist);
1372  return false;
1373  }
1374 
1375  return invoke(originalFile, originalName, argv, env, state);
1376 }
1377 
1379  File *originalFile, const String &originalName, Vector<String> &argv,
1380  Vector<String> &env)
1381 {
1382  return invoke(originalFile, originalName, argv, env, 0);
1383 }
1384 
1386  File *originalFile, const String &originalName, Vector<String> &argv,
1387  Vector<String> &env, SyscallState &state)
1388 {
1389  return invoke(originalFile, originalName, argv, env, &state);
1390 }
1391 
1393  File *originalFile, const String &originalName, Vector<String> &argv,
1394  Vector<String> &env, SyscallState *state)
1395 {
1396  PS_NOTICE("PosixSubsystem::invoke(" << originalName << ")");
1397 
1398  Process *pProcess =
1399  Processor::information().getCurrentThread()->getParent();
1400  PosixSubsystem *pSubsystem =
1401  reinterpret_cast<PosixSubsystem *>(pProcess->getSubsystem());
1402 
1403  // Grab the thread we're going to return into - need to tweak it.
1404  Thread *pThread = pProcess->getThread(0);
1405 
1406  // Ensure we only have one thread running (us).
1407  if (pProcess->getNumThreads() > 1)
1408  {
1410  PS_NOTICE("invoke attempted with multiple threads in this process");
1411  return false;
1412  }
1413 
1414  originalFile = traverseForInvoke(originalFile);
1415  if (!originalFile)
1416  {
1417  // traverseForInvoke does a SYSCALL_ERROR for us
1418  return false;
1419  }
1420 
1421  uint8_t validateBuffer[128];
1422  size_t nBytes =
1423  originalFile->read(0, 128, reinterpret_cast<uintptr_t>(validateBuffer));
1424 
1425  Elf *validElf = new Elf();
1426  if (!validElf->validate(validateBuffer, nBytes))
1427  {
1428  PS_NOTICE(
1429  "PosixSubsystem::invoke: '"
1430  << originalFile->getName() << "' is not an ELF binary, looking for shebang...");
1431 
1432  File *shebangFile = 0;
1433  if (!parseShebang(originalFile, shebangFile, argv))
1434  {
1435  PS_NOTICE(
1436  "PosixSubsystem::invoke: failed to parse shebang line in '"
1437  << originalFile->getName() << "'");
1438  return false;
1439  }
1440 
1441  // Switch to the real target if we must; parseShebang adjusts argv for
1442  // us.
1443  if (shebangFile)
1444  {
1445  originalFile = shebangFile;
1446 
1447  // Handle symlinks in shebang target.
1448  originalFile = traverseForInvoke(originalFile);
1449  if (!originalFile)
1450  {
1451  return false;
1452  }
1453  }
1454  }
1455 
1456  // Can we read & execute the given target?
1457  if (!VFS::checkAccess(originalFile, true, false, true))
1458  {
1459  // checkAccess does a SYSCALL_ERROR for us.
1460  return -1;
1461  }
1462 
1463  File *interpreterFile = 0;
1464 
1465  // Inhibit all signals from coming in while we trash the address space...
1466  for (int sig = 0; sig < 32; sig++)
1467  Processor::information().getCurrentThread()->inhibitEvent(sig, true);
1468 
1469  // Determine if the target uses an interpreter or not.
1470  String interpreter("");
1471  DynamicLinker *pLinker = new DynamicLinker();
1472  pProcess->setLinker(pLinker);
1473  if (pLinker->checkInterpreter(originalFile, interpreter))
1474  {
1475  // Ensure we can actually find the interpreter.
1476  interpreterFile = findFileWithAbiFallbacks(interpreter);
1477  interpreterFile = traverseForInvoke(interpreterFile);
1478  if (!interpreterFile)
1479  {
1480  PS_NOTICE(
1481  "PosixSubsystem::invoke: could not find interpreter '"
1482  << interpreter << "'");
1483  SYSCALL_ERROR(ExecFormatError);
1484  return false;
1485  }
1486  }
1487  else
1488  {
1489  // No interpreter, just invoke the binary directly.
1491  interpreterFile = originalFile;
1492  }
1493 
1494  // No longer need the DynamicLinker instance.
1495  delete pLinker;
1496  pLinker = 0;
1497  pProcess->setLinker(pLinker);
1498 
1499  // Wipe out old address space.
1501 
1502  // We now need to clean up the process' address space.
1503  pProcess->getSpaceAllocator().clear();
1504  pProcess->getDynamicSpaceAllocator().clear();
1505  pProcess->getSpaceAllocator().free(
1506  pProcess->getAddressSpace()->getUserStart(),
1507  pProcess->getAddressSpace()->getUserReservedStart() -
1508  pProcess->getAddressSpace()->getUserStart());
1509  if (pProcess->getAddressSpace()->getDynamicStart())
1510  {
1511  pProcess->getDynamicSpaceAllocator().free(
1512  pProcess->getAddressSpace()->getDynamicStart(),
1513  pProcess->getAddressSpace()->getDynamicEnd() -
1514  pProcess->getAddressSpace()->getDynamicStart());
1515  }
1517 
1518  // Map in the two ELF files so we can load them into the address space.
1519  uintptr_t originalBase = 0, interpreterBase = 0;
1520  MemoryMappedObject::Permissions perms = MemoryMappedObject::Read |
1521  MemoryMappedObject::Write |
1522  MemoryMappedObject::Exec;
1524  originalFile, originalBase, originalFile->getSize(), perms);
1525  if (!pOriginal)
1526  {
1527  PS_NOTICE("PosixSubsystem::invoke: failed to map target");
1528  SYSCALL_ERROR(OutOfMemory);
1529  return false;
1530  }
1531 
1533  interpreterFile, interpreterBase, interpreterFile->getSize(), perms);
1534  if (!pInterpreter)
1535  {
1536  PS_NOTICE("PosixSubsystem::invoke: failed to map interpreter");
1537  MemoryMapManager::instance().unmap(pOriginal);
1538  SYSCALL_ERROR(OutOfMemory);
1539  return false;
1540  }
1541 
1542  // Load the target application first.
1543  uintptr_t originalLoadedAddress = 0;
1544  uintptr_t originalFinalAddress = 0;
1545  bool originalRelocated = false;
1546  if (!loadElf(
1547  originalFile, originalBase, originalLoadedAddress,
1548  originalFinalAddress, originalRelocated))
1549  {
1551  PS_NOTICE("PosixSubsystem::invoke: failed to load target");
1552  SYSCALL_ERROR(ExecFormatError);
1553  return false;
1554  }
1555 
1556  // Now load the interpreter.
1557  uintptr_t interpreterLoadedAddress = 0;
1558  uintptr_t interpreterFinalAddress = 0;
1559  bool interpreterRelocated = false;
1560  if (!loadElf(
1561  interpreterFile, interpreterBase, interpreterLoadedAddress,
1562  interpreterFinalAddress, interpreterRelocated))
1563  {
1565  PS_NOTICE("PosixSubsystem::invoke: failed to load interpreter");
1566  SYSCALL_ERROR(ExecFormatError);
1567  return false;
1568  }
1569 
1570  // Extract entry points.
1571  uintptr_t originalEntryPoint = 0, interpreterEntryPoint = 0;
1573  reinterpret_cast<uint8_t *>(originalBase), originalFile->getSize(),
1574  originalEntryPoint);
1576  reinterpret_cast<uint8_t *>(interpreterBase),
1577  interpreterFile->getSize(), interpreterEntryPoint);
1578 
1579  if (originalRelocated)
1580  {
1581  originalEntryPoint += originalLoadedAddress;
1582  }
1583  if (interpreterRelocated)
1584  {
1585  interpreterEntryPoint += interpreterLoadedAddress;
1586  }
1587 
1588  // Pull out the ELF header information for the original image.
1589  Elf::ElfHeader_t *originalHeader =
1590  reinterpret_cast<Elf::ElfHeader_t *>(originalBase);
1591 
1592  // Past point of no return, so set up the process for the new image.
1593  pProcess->description() = originalName;
1594  pProcess->resetCounts();
1595  pThread->resetTlsBase();
1596  if (pSubsystem)
1597  pSubsystem->freeMultipleFds(true);
1598  while (pThread->getStateLevel())
1599  pThread->popState();
1600 
1601  if (pProcess->getType() == Process::Posix)
1602  {
1604  PosixProcess *p = static_cast<PosixProcess *>(pProcess);
1605  p->setSavedUserId(p->getEffectiveUserId());
1606  p->setSavedGroupId(p->getEffectiveGroupId());
1607  }
1608 
1609  // Allocate some space for the VDSO
1610  MemoryMappedObject::Permissions vdsoPerms = MemoryMappedObject::Read |
1611  MemoryMappedObject::Write |
1612  MemoryMappedObject::Exec;
1613  uintptr_t vdsoAddress = 0;
1615  vdsoAddress, __vdso_so_pages * PhysicalMemoryManager::getPageSize(),
1616  vdsoPerms);
1617  if (!pVdso)
1618  {
1619  PS_NOTICE("PosixSubsystem::invoke: failed to map VDSO");
1620  }
1621  else
1622  {
1623  // All good, copy in the VDSO ELF image now.
1624  MemoryCopy(
1625  reinterpret_cast<void *>(vdsoAddress), __vdso_so, __vdso_so_len);
1626 
1627  // Readjust permissions to remove write access now that the image is
1628  // loaded.
1630  vdsoAddress, __vdso_so_pages * PhysicalMemoryManager::getPageSize(),
1631  vdsoPerms & ~MemoryMappedObject::Write);
1632  }
1633 
1634  // Map in the vsyscall space.
1635  if (!Processor::information().getVirtualAddressSpace().isMapped(
1636  reinterpret_cast<void *>(POSIX_VSYSCALL_ADDRESS)))
1637  {
1638  physical_uintptr_t vsyscallBase = 0;
1639  size_t vsyscallFlags = 0;
1640  Processor::information().getVirtualAddressSpace().getMapping(
1641  &__posix_compat_vsyscall_base, vsyscallBase, vsyscallFlags);
1642  Processor::information().getVirtualAddressSpace().map(
1643  vsyscallBase, reinterpret_cast<void *>(POSIX_VSYSCALL_ADDRESS),
1645  }
1646 
1647  // We can now build the auxiliary vector to pass to the dynamic linker.
1649  Processor::information().getVirtualAddressSpace().allocateStack();
1650  uintptr_t *loaderStack = reinterpret_cast<uintptr_t *>(stack->getTop());
1651 
1652  // Top of stack = zero to mark end
1653  STACK_PUSH(loaderStack, 0);
1654 
1655  // Align to 16 byte stack
1656  STACK_ALIGN(loaderStack, 16);
1657 
1658  // Push argv/env.
1659  char **envs = new char *[env.count()];
1660  size_t envc = 0;
1661  for (size_t i = 0; i < env.count(); ++i)
1662  {
1663  String &str = env[i];
1664  STACK_PUSH_STRING(
1665  loaderStack, static_cast<const char *>(str), str.length() + 1);
1666  PS_NOTICE("env[" << envc << "]: " << str);
1667  envs[envc++] = reinterpret_cast<char *>(loaderStack);
1668  }
1669 
1670  // Align to 16 bytes between env and argv
1671  STACK_ALIGN(loaderStack, 16);
1672 
1673  char **argvs = new char *[argv.count()];
1674  size_t argc = 0;
1675  for (size_t i = 0; i < argv.count(); ++i)
1676  {
1677  String &str = argv[i];
1678  STACK_PUSH_STRING(
1679  loaderStack, static_cast<const char *>(str), str.length() + 1);
1680  PS_NOTICE("argv[" << argc << "]: " << str);
1681  argvs[argc++] = reinterpret_cast<char *>(loaderStack);
1682  }
1683 
1684  // Align to 16 bytes between argv and remaining strings
1685  STACK_ALIGN(loaderStack, 16);
1686 
1688  STACK_PUSH_STRING(loaderStack, "x86_64", 7);
1689  void *platform = loaderStack;
1690 
1691  STACK_PUSH_STRING(loaderStack, originalName, originalName.length() + 1);
1692  void *execfn = loaderStack;
1693 
1694  // Align to 16 bytes to prepare for the auxv entries
1695  STACK_ALIGN(loaderStack, 16);
1696 
1698  STACK_PUSH_ZEROES(loaderStack, 16);
1699  void *random = loaderStack;
1700 
1701  // Ensure argc aligns to 16 bytes.
1702  if (((argc + envc) % 2) == 0)
1703  {
1704  STACK_PUSH_ZEROES(loaderStack, 8);
1705  }
1706 
1707  // Build the aux vector now.
1708  STACK_PUSH2(loaderStack, 0, 0); // AT_NULL
1709  STACK_PUSH2(
1710  loaderStack, reinterpret_cast<uintptr_t>(platform), 15); // AT_PLATFORM
1711  STACK_PUSH2(
1712  loaderStack, reinterpret_cast<uintptr_t>(random), 25); // AT_RANDOM
1713  STACK_PUSH2(loaderStack, 0, 23);
1714  STACK_PUSH2(loaderStack, pProcess->getUserId(), 14); // AT_EGID
1715  STACK_PUSH2(loaderStack, pProcess->getGroupId(), 13); // AT_GID
1716  STACK_PUSH2(loaderStack, pProcess->getEffectiveUserId(), 12); // AT_EUID
1717  STACK_PUSH2(loaderStack, pProcess->getEffectiveGroupId(), 11); // AT_UID
1718  STACK_PUSH2(
1719  loaderStack, reinterpret_cast<uintptr_t>(execfn), 31); // AT_EXECFN
1720 
1721  // Push the vDSO shared object.
1722  if (pVdso)
1723  {
1724  STACK_PUSH2(loaderStack, 0, 32); // AT_SYSINFO - not present
1725  STACK_PUSH2(loaderStack, vdsoAddress, 33); // AT_SYSINFO_EHDR
1726  }
1727 
1728  // ELF parts in the aux vector.
1729  STACK_PUSH2(loaderStack, originalEntryPoint, 9); // AT_ENTRY
1730  STACK_PUSH2(loaderStack, interpreterLoadedAddress, 7); // AT_BASE
1731  STACK_PUSH2(
1732  loaderStack, PhysicalMemoryManager::getPageSize(), 6); // AT_PAGESZ
1733  STACK_PUSH2(loaderStack, originalHeader->phnum, 5); // AT_PHNUM
1734  STACK_PUSH2(loaderStack, originalHeader->phentsize, 4); // AT_PHENT
1735  STACK_PUSH2(
1736  loaderStack, originalLoadedAddress + originalHeader->phoff,
1737  3); // AT_PHDR
1738 
1739  // env
1740  STACK_PUSH(loaderStack, 0); // env[N]
1741  for (size_t i = 0; i < envc; ++i)
1742  {
1743  STACK_PUSH(loaderStack, reinterpret_cast<uintptr_t>(envs[i]));
1744  }
1745 
1746  // argv
1747  STACK_PUSH(loaderStack, 0); // argv[N]
1748  for (ssize_t i = argc - 1; i >= 0; --i)
1749  {
1750  STACK_PUSH(loaderStack, reinterpret_cast<uintptr_t>(argvs[i]));
1751  }
1752 
1753  // argc
1754  STACK_PUSH(loaderStack, argc);
1755 
1756  // We can now unmap both original objects as they've been loaded and
1757  // consumed.
1758  MemoryMapManager::instance().unmap(pInterpreter);
1759  MemoryMapManager::instance().unmap(pOriginal);
1760  pInterpreter = pOriginal = 0;
1761 
1762  // Initialise the sigret if not already done for this process
1763  pedigree_init_sigret();
1764  // pedigree_init_pthreads();
1765 
1767  pProcess->recordTime(true);
1768 
1769  if (!state)
1770  {
1771  // Just create a new thread, this is not a full replace.
1772  Thread *pNewThread = new Thread(
1773  pProcess,
1774  reinterpret_cast<Thread::ThreadStartFunc>(interpreterEntryPoint), 0,
1775  loaderStack);
1776  pNewThread->detach();
1777 
1778  return true;
1779  }
1780  else
1781  {
1782  // This is a replace and requires a jump to userspace.
1783  SchedulerState s;
1784  ByteSet(&s, 0, sizeof(s));
1785  pThread->state() = s;
1786 
1787  // Allow signals again now that everything's loaded
1788  for (int sig = 0; sig < 32; sig++)
1789  {
1790  Processor::information().getCurrentThread()->inhibitEvent(
1791  sig, false);
1792  }
1793 
1794  // Jump to the new process.
1796  0, interpreterEntryPoint, reinterpret_cast<uintptr_t>(loaderStack));
1797  }
1798 
1799  // unreachable
1800 }
void store(const K &key, const T &object)
Definition: LruCache.h:78
virtual bool kill(KillReason killReason, Thread *pThread)
virtual ~PosixSubsystem()
void release()
Definition: Spinlock.cc:273
File * find(const String &path, File *pStartNode=0)
Definition: VFS.cc:243
Iterator end()
Definition: Tree.h:348
void pushBack(const T &value)
Definition: List.h:232
void clear()
Definition: Tree.h:305
T popBack()
Definition: Vector.h:286
Iterator begin()
Definition: Vector.h:148
void unexpectedExit()
Definition: Thread.cc:754
virtual void release()
Tree< size_t, PosixThreadKey * > m_ThreadData
Abi getAbi() const
size_t count() const
Definition: Vector.h:264
void popState(bool clean=true)
Definition: Thread.cc:444
Iterator erase(Iterator &Iter)
Definition: List.h:343
Thread * m_pAcquiredThread
virtual void getMapping(void *virtualAddress, physical_uintptr_t &physicalAddress, size_t &flags)=0
size_t getId()
Definition: Process.h:108
int getExitStatus()
Definition: Process.h:131
void freeFd(size_t fdNum)
A vector / dynamic array.
virtual bool invoke(const char *name, Vector< String > &argv, Vector< String > &env)
Tree< size_t, FileDescriptor * > m_FdMap
MemoryMappedObject * mapAnon(uintptr_t &address, size_t length, MemoryMappedObject::Permissions perms)
virtual uintptr_t getUserStart() const =0
ExtensibleBitmap m_FdBitmap
bool acquire()
Definition: UnlikelyLock.cc:53
size_t size() const
Definition: List.h:222
virtual bool isMapped(void *virtualAddress)=0
bool copyDescriptors(PosixSubsystem *pSubsystem)
virtual uintptr_t getUserReservedStart() const =0
void freeMultipleFds(bool bOnlyCloExec=false, size_t iFirst=0, size_t iLast=-1)
Definition: Mutex.h:58
virtual int64_t getUserId() const
Definition: Process.cc:320
size_t getStateLevel() const
Definition: Thread.cc:474
Tree< size_t, PosixThread * > m_Threads
void setUnwindState(UnwindType ut)
Definition: Thread.h:266
bool acquire(bool recurse=false, bool safe=true)
Definition: Spinlock.cc:43
Tree< void *, Semaphore * > m_ThreadWaiters
Definition: String.h:49
Filesystem * lookupFilesystem(const String &alias)
Definition: VFS.cc:221
virtual uintptr_t getKernelStart() const =0
virtual void threadException(Thread *pThread, ExceptionType eType)
Definition: Subsystem.cc:35
void free(T address, T length, bool merge=true)
Definition: RangeList.h:163
static ProcessorInformation & information()
Definition: Processor.cc:45
void addFileDescriptor(size_t fd, FileDescriptor *pFd)
static void switchAddressSpace(VirtualAddressSpace &AddressSpace)
static VFS & instance()
Definition: VFS.cc:56
void setGroupId(size_t gid)
File * file
Our open file pointer.
SchedulerState & state()
Definition: Thread.cc:420
bool test(size_t n) const
void unmap(MemoryMappedObject *pObj)
Tree< size_t, SignalHandler * > m_SignalHandlers
Tree< size_t, PosixSyncObject * > m_SyncObjects
size_t setPermissions(uintptr_t base, size_t length, MemoryMappedObject::Permissions perms)
virtual bool isSymlink()
Definition: File.cc:431
void resetTlsBase()
Definition: Thread.cc:811
bool hasEvent(Event *pEvent)
Definition: Thread.cc:698
static bool checkAddress(uintptr_t addr, size_t extent, size_t flags)
Vector< Thread * > m_Threads
Definition: Process.h:437
#define WARNING(text)
Definition: Log.h:78
static bool extractEntryPoint(uint8_t *pBuffer, size_t length, uintptr_t &entry)
Definition: linker/Elf.cc:1119
bool sendEvent(Event *pEvent)
Definition: Thread.cc:529
Definition: List.h:64
ssize_t getValue()
Definition: Semaphore.cc:318
void insert(const K &key, const E &value)
Definition: Tree.h:173
SignalHandler * getSignalHandler(size_t sig)
void exit(int code) NORETURN
Memory-mapped file interface.
void allocateFd(size_t fdNum)
Process * getParent()
Definition: Process.h:150
MemoryAllocator & getDynamicSpaceAllocator()
Definition: Process.h:183
LruCache< String, File * > m_FindFileCache
LRU cache for file lookups. Many usage patterns involve something like a stat() immediately followed ...
Iterator begin()
Definition: Tree.h:326
void release(size_t n=1)
Definition: Semaphore.cc:239
#define NOTICE(text)
Definition: Log.h:74
SignalEvent * pEvent
Event for the signal handler.
virtual String getFullPath(bool bWithLabel=true)
Definition: File.cc:718
Definition: Log.h:136
void setExitStatus(int code)
Definition: Process.h:126
static void jumpUser(volatile uintptr_t *pLock, uintptr_t address, uintptr_t stack, uintptr_t p1=0, uintptr_t p2=0, uintptr_t p3=0, uintptr_t p4=0) NORETURN
#define assert(x)
Definition: assert.h:37
static MemoryMapManager & instance()
Iterator begin()
Definition: List.h:123
void recordTime(bool bUserspace)
Definition: Process.h:317
static Scheduler & instance()
Definition: Scheduler.h:48
Filesystem * m_pRootFs
static void setInterrupts(bool bEnable)
MemoryMappedObject * mapFile(File *pFile, uintptr_t &address, size_t length, MemoryMappedObject::Permissions perms, size_t offset=0, bool bCopyOnWrite=true)
size_t sig
Signal number.
size_t getNumThreads()
Definition: Process.cc:219
void set(size_t n)
virtual uint64_t read(uint64_t location, uint64_t size, uintptr_t buffer, bool bCanBlock=true) final
Definition: File.cc:116
Thread * getThread(size_t n)
Definition: Process.cc:225
String getName() const
Definition: File.cc:411
Process * getParent() const
Definition: Thread.h:181
void pushFront(const T &value)
Definition: Vector.h:293
List< PosixProcess * > Members
Definition: PosixProcess.h:67
Definition: Thread.h:54
UnlikelyLock m_SignalHandlersLock
void setSignalHandler(size_t sig, SignalHandler *handler)
virtual void threadRemoved(Thread *pThread)
VirtualAddressSpace * getAddressSpace()
Definition: Process.h:120
bool parseShebang(File *pFile, File *&outFile, Vector< String > &argv)
int fdflags
File descriptor flags (fcntl)
void returnGroupId(size_t gid)
LargeStaticString & description()
Definition: Process.h:114
Definition: Elf.h:201
virtual ProcessType getType()
Definition: Process.h:259
#define ERROR(text)
Definition: Log.h:82
virtual void revertToKernelAddressSpace()=0
bool loadElf(File *pFile, uintptr_t mappedAddress, uintptr_t &newAddress, uintptr_t &finalAddress, bool &relocated)
FileDescriptor * getFileDescriptor(size_t fd)
virtual bool isDirectory()
Definition: File.cc:436
bool validate(uint8_t *pBuffer, size_t length)
Definition: linker/Elf.cc:284
void yield()
Definition: Scheduler.cc:135
bool detach()
Definition: Thread.cc:885
Definition: Log.h:138
size_t getId()
Definition: Thread.h:210
void kill() NORETURN
Definition: Process.cc:239
void remove(const K &key)
Definition: Tree.h:242
virtual void sendSignal(Thread *pThread, int signal, bool yield=true)
An iterator applicable for many data structures.
Definition: Iterator.h:180
(b) below.
Definition: Thread.h:245
#define FATAL(text)
Definition: Log.h:89
bool tryAcquire(size_t n=1)
Definition: Semaphore.cc:223
bool checkInterpreter(File *pFile, String &actualFilename)
Definition: DynamicLinker.h:50
virtual void acquire()
Acquire full mutual exclusion for all Subsystem resources.
Definition: File.h:66
Iterator end()
Definition: List.h:135
PosixProcess * Leader
Definition: PosixProcess.h:61
UnlikelyLock m_FdLock
bool get(const K &key, T &object)
Definition: LruCache.h:55
virtual File * findFile(const String &path, File *workingDir)
void clear(size_t n)
E lookup(const K &key) const
Definition: Tree.h:192
bool isGroupIdValid(size_t gid) const
File * getCwd()
Definition: Process.h:156
Definition: Pipe.h:35
static bool checkAccess(File *pFile, bool bRead, bool bWrite, bool bExecute)
Definition: VFS.cc:448
MemoryAllocator & getSpaceAllocator()
Definition: Process.h:178
Implements a Radix Tree, a kind of Trie with compressed keys.
virtual void threadException(Thread *pThread, ExceptionType eType)
void release()
Definition: UnlikelyLock.cc:65
virtual File * getRoot() const =0